Load libraries and data
easypackages::libraries("here","ggplot2","caret","e1071","pheatmap","reshape2","NbClust","grid","patchwork","readxl","patchwork","WGCNA","psych","nlme","reshape2")
source(here("code","ndar_functions.R"))
source(here("code","euaims_functions.R"))
source(here("code","get_ggColorHue.R"))
source(here("code","cohens_d.R"))
source(here("code","Repfunctionspack6.R"))
options(stringsAsFactors = FALSE)
fontSize = 20
nperm=1
codepath = here("code")
datapath = here("data")
figpath = here("figures")
resultpath = here("results","ndar")
plotpath = here("plots","ndar")
# function to make subtype
make_subtype <- function(data2use, z_thresh, mean2use=NULL, sd2use=NULL){
# compute difference score
vars2use = c("dbaes_atotal","dbaes_btotal")
diff_score = data2use[,vars2use[1]] - data2use[,vars2use[2]]
# compute mean and sd if necessary
if (is.null(mean2use)){
mean2use = mean(diff_score)
} # if (is.null(mean2use))
if (is.null(sd2use)){
sd2use = sd(diff_score)
} # if (is.null(sd2use))
# compute z-score
data2use$z_ds = (diff_score - mean2use)/sd2use
# make subtype factor
data2use$z_ds_group = "SC_equal_RRB"
data2use$z_ds_group[data2use$z_ds>z_thresh] = "SC_over_RRB"
data2use$z_ds_group[data2use$z_ds<(z_thresh*-1)] = "RRB_over_SC"
data2use$z_ds_group = factor(data2use$z_ds_group)
return(data2use)
} # function make_subtype
# read in data
Dverbal_Discovery = read.csv(file.path(datapath,"tidy_verbal_disc.csv"))
Dverbal_Replication = read.csv(file.path(datapath,"tidy_verbal_rep.csv"))
vars2use = c("dbaes_atotal","dbaes_btotal")
rownames(Dverbal_Discovery) = Dverbal_Discovery$subjectkey
rownames(Dverbal_Replication) = Dverbal_Replication$subjectkey
#------------------------------------------------------------------------------
# add in ADOS
ados_Discovery = read.csv(file.path(datapath,"ndar_ados_css_discovery.csv"))
ados_Replication = read.csv(file.path(datapath,"ndar_ados_css_replication.csv"))
ados_Discovery$ados_age = ados_Discovery$interview_age
ados_Replication$ados_age = ados_Replication$interview_age
Dverbal_Discovery$ados_age = NA
Dverbal_Discovery$ados_sa_css = NA
Dverbal_Discovery$ados_rrb_css = NA
Dverbal_Replication$ados_age = NA
Dverbal_Replication$ados_sa_css = NA
Dverbal_Replication$ados_rrb_css = NA
# mask = is.element(Dverbal_Discovery$subjectkey,ados_Discovery$subjectkey)
Dverbal_Discovery[ados_Discovery$subjectkey,"ados_age"] = ados_Discovery$interview_age
Dverbal_Discovery[ados_Discovery$subjectkey,"ados_sa_css"] = ados_Discovery$ados_sa_css
Dverbal_Discovery[ados_Discovery$subjectkey,"ados_rrb_css"] = ados_Discovery$ados_rrb_css
Dverbal_Replication[ados_Replication$subjectkey,"ados_age"] = ados_Replication$interview_age
Dverbal_Replication[ados_Replication$subjectkey,"ados_sa_css"] = ados_Replication$ados_sa_css
Dverbal_Replication[ados_Replication$subjectkey,"ados_rrb_css"] = ados_Replication$ados_rrb_css
#------------------------------------------------------------------------------
# add in IQ
iq_Discovery = read.csv(file.path(datapath,"ndar_iq_discovery.csv"))
iq_Replication = read.csv(file.path(datapath,"ndar_iq_replication.csv"))
iq_Discovery$iq_age = iq_Discovery$interview_age
iq_Replication$iq_age = iq_Replication$interview_age
Dverbal_Discovery$iq_age = NA
Dverbal_Discovery$iq = NA
Dverbal_Replication$iq_age = NA
Dverbal_Replication$iq = NA
# mask = is.element(Dverbal_Discovery$subjectkey,ados_Discovery$subjectkey)
Dverbal_Discovery[iq_Discovery$subjectkey,"iq_age"] = iq_Discovery$iq_age
Dverbal_Discovery[iq_Discovery$subjectkey,"iq"] = iq_Discovery$IQ
Dverbal_Replication[iq_Replication$subjectkey,"iq_age"] = iq_Replication$iq_age
Dverbal_Replication[iq_Replication$subjectkey,"iq"] = iq_Replication$IQ
#------------------------------------------------------------------------------
# Subtype using hierarchical clustering and dynamic hybrid tree cut algorithm to find the subtypes
# NDAR Discovery --------------------------------------------------------------
# deep split parameter
dS = 0
maxScores = c(3,4)
data2use = Dverbal_Discovery[,vars2use]
# discReorderedItems = colnames(data2use)
fname2save = file.path(plotpath,
sprintf("clustergram_ADIalgoTotals_verbalDiscovery_euclidean_ward_deepSplit%d.pdf",dS))
verbalDiscovery_clustResults = ClusterData(data2use,
deepSplit=dS,
fname2save = fname2save)
## ..cutHeight not given, setting it to 43.9 ===> 99% of the (truncated) height range in dendro.
## ..done.
oldColors = c("blue","brown","green","red","turquoise","yellow")
newColors = c("5","4","6","3","2","1")
verbalDiscovery_clustResults = relabelClusters(verbalDiscovery_clustResults, oldColors, newColors)
makeClustergram(verbalDiscovery_clustResults, fname2save = fname2save)
## quartz_off_screen
## 2
# make plot with all individuals shown as lines
df2use = Dverbal_Discovery[,c("subjectkey",vars2use)]
df2use$subgrp = factor(verbalDiscovery_clustResults$dynamicColors)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
# df2use$dbaes_atotal = df2use$dbaes_atotal
# df2use$dbaes_btotal = df2use$dbaes_btotal
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
colors2use = get_ggColorHue(7)
colors2use = colors2use[1:6]
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + guides(color=FALSE) + ylim(0,1)
p = p + scale_colour_manual(values = colors2use)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
fname2save = file.path(plotpath,
sprintf("summaryPlot_IndividualSubs_ADIalgoTotals_verbalDiscovery_euclidean_ward_deepSplit%d.pdf",dS))
ggsave(filename = fname2save)
p
# scatterplot
p_disc = ggplot(data = df2use, aes(x = SC, y = RRB, colour = factor(subgrp))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ggtitle("NDAR Discovery") + ylim(0,1) + xlim(0,1) + scale_colour_manual(values = colors2use)
p_disc
# NDAR Replication ------------------------------------------------------------
# deep split parameter
dS = 0
maxScores = c(3,4)
data2use = Dverbal_Replication[,vars2use]
# discReorderedItems = colnames(data2use)
fname2save = file.path(plotpath,
sprintf("clustergram_ADIalgoTotals_verbalReplication_euclidean_ward_deepSplit%d.pdf",dS))
verbalReplication_clustResults = ClusterData(data2use,
deepSplit=dS,
fname2save = fname2save)
## ..cutHeight not given, setting it to 42.9 ===> 99% of the (truncated) height range in dendro.
## ..done.
oldColors = c("black","blue","brown","green","red","turquoise","yellow")
newColors = c("7","1","4","3","2","6","5")
verbalReplication_clustResults = relabelClusters(verbalReplication_clustResults, oldColors, newColors)
makeClustergram(verbalReplication_clustResults, fname2save = fname2save)
## quartz_off_screen
## 2
# make plot with all individuals shown as lines
df2use = Dverbal_Replication[,c("subjectkey",vars2use)]
df2use$subgrp = factor(verbalReplication_clustResults$dynamicColors)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
# df2use$dbaes_atotal = df2use$dbaes_atotal
# df2use$dbaes_btotal = df2use$dbaes_btotal
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + guides(color=FALSE) + ylim(0,1)
# p = p + scale_colour_manual(values = colors2use)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
fname2save = file.path(plotpath,
sprintf("summaryPlot_IndividualSubs_ADIalgoTotals_verbalReplication_euclidean_ward_deepSplit%d.pdf",dS))
ggsave(filename = fname2save)
p
# scatterplot
p_rep = ggplot(data = df2use, aes(x = SC, y = RRB, colour = factor(subgrp))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ggtitle("NDAR Replication") + ylim(0,1) + xlim(0,1)
p_rep
# Subtype using hierarchical agglomerative clustering, looking for k=3
# Will use SC, RRB and the difference score as features
# how many clusters do you want?
nclusters = 3
# cluster with SC, RRB, and difference score
ds = Dverbal_Discovery[,vars2use[1]] - Dverbal_Discovery[,vars2use[2]]
# distance matrix
distmat = dist(x = cbind(Dverbal_Discovery[,vars2use], ds), method="euclidean")
# hierarchical clustering
disc_tree = hclust(d=distmat, method="ward.D2")
# cut the tree
treecut_res = cutree(tree=disc_tree, k=nclusters)
Dverbal_Discovery$hc_subgrp = treecut_res
ds = Dverbal_Replication[,vars2use[1]] - Dverbal_Replication[,vars2use[2]]
# distance matrix
distmat = dist(x = cbind(Dverbal_Replication[,vars2use], ds), method="euclidean")
# hierarchical clustering
disc_tree = hclust(d=distmat, method="ward.D2")
# cut the tree
treecut_res = cutree(tree=disc_tree, k=nclusters)
Dverbal_Replication$hc_subgrp = treecut_res
# Plot Discovery dataset after using hierarchical agglomerative clustering
# Discovery set
maxScores = c(3,4)
# Discovery - make plot with all individuals shown as lines
df2use = Dverbal_Discovery[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(Dverbal_Discovery$hc_subgrp)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p
# scatterplot
p_disc = ggplot(data = Dverbal_Discovery, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(hc_subgrp))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ggtitle("NDAR Discovery")
p_disc
table(Dverbal_Discovery$hc_subgrp)
##
## 1 2 3
## 339 225 325
# Plot Replication dataset after using hierarchical agglomerative clustering
maxScores = c(3,4)
# Replication - make plot with all individuals shown as lines
df2use = Dverbal_Replication[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(Dverbal_Replication$hc_subgrp)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p
# scatterplot
p_rep = ggplot(data = Dverbal_Replication, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(hc_subgrp))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ggtitle("NDAR Replication")
p_rep
table(Dverbal_Replication$hc_subgrp)
##
## 1 2 3
## 357 248 285
# Subtype using hierarchical agglomerative clustering, but use NbClust to find optimal number of clusters
nbc_disc_res = NbClust(data = Dverbal_Discovery[,vars2use], method = "ward.D2")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 6 proposed 2 as the best number of clusters
## * 3 proposed 3 as the best number of clusters
## * 7 proposed 4 as the best number of clusters
## * 1 proposed 5 as the best number of clusters
## * 1 proposed 6 as the best number of clusters
## * 1 proposed 8 as the best number of clusters
## * 1 proposed 14 as the best number of clusters
## * 3 proposed 15 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 4
##
##
## *******************************************************************
nbc_rep_res = NbClust(data = Dverbal_Replication[,vars2use], method = "ward.D2")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 5 proposed 2 as the best number of clusters
## * 6 proposed 3 as the best number of clusters
## * 2 proposed 5 as the best number of clusters
## * 5 proposed 7 as the best number of clusters
## * 1 proposed 9 as the best number of clusters
## * 1 proposed 13 as the best number of clusters
## * 3 proposed 15 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
# Plot Discovery dataset after using hierarchical agglomerative clustering and NbClust
# Discovery set
maxScores = c(3,4)
# Discovery - make plot with all individuals shown as lines
df2use = Dverbal_Discovery[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(nbc_disc_res$Best.partition)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p
# scatterplot
Dverbal_Discovery$nbclust_subgrp = factor(nbc_disc_res$Best.partition)
p_disc = ggplot(data = Dverbal_Discovery, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(nbclust_subgrp))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ggtitle("NDAR Discovery")
p_disc
table(Dverbal_Discovery$nbclust_subgrp)
##
## 1 2 3 4
## 149 369 171 200
# Plot Replication dataset after using hierarchical agglomerative clustering
maxScores = c(3,4)
# Replication - make plot with all individuals shown as lines
df2use = Dverbal_Replication[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(nbc_rep_res$Best.partition)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p
# scatterplot
Dverbal_Replication$nbclust_subgrp = factor(nbc_rep_res$Best.partition)
p_rep = ggplot(data = Dverbal_Replication, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(nbclust_subgrp))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ggtitle("NDAR Replication")
p_rep
table(Dverbal_Replication$nbclust_subgrp)
##
## 1 2 3
## 450 353 87
#------------------------------------------------------------------------------
# Subtyping using Z-score of the difference between SC and RRB
# Z-score threshold to use for subtyping
z_thresh = 0.5
# vars2use = c("dbaes_atotal","dbaes_btotal")
# compute Discovery mean and sd
ds_disc = Dverbal_Discovery[,vars2use[1]] - Dverbal_Discovery[,vars2use[2]]
mean2use = mean(ds_disc)
sd2use = sd(ds_disc)
Dverbal_Discovery = make_subtype(data2use = Dverbal_Discovery,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
# compute Replication mean and sd
ds_rep = Dverbal_Replication[,vars2use[1]] - Dverbal_Replication[,vars2use[2]]
mean2use = mean(ds_rep)
sd2use = sd(ds_rep)
Dverbal_Replication = make_subtype(data2use = Dverbal_Replication,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#------------------------------------------------------------------------------
# Table of subtypes X sex
# Discovery
table(Dverbal_Discovery$z_ds_group,Dverbal_Discovery$sex)
##
## F M
## RRB_over_SC 55 227
## SC_equal_RRB 82 256
## SC_over_RRB 60 209
cs_res = chisq.test(Dverbal_Discovery$z_ds_group,Dverbal_Discovery$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: Dverbal_Discovery$z_ds_group and Dverbal_Discovery$sex
## X-squared = 2.0214, df = 2, p-value = 0.364
# Replication
table(Dverbal_Replication$z_ds_group,Dverbal_Replication$sex)
##
## F M
## RRB_over_SC 51 214
## SC_equal_RRB 93 281
## SC_over_RRB 52 199
cs_res = chisq.test(Dverbal_Replication$z_ds_group,Dverbal_Replication$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: Dverbal_Replication$z_ds_group and Dverbal_Replication$sex
## X-squared = 3.2006, df = 2, p-value = 0.2018
#------------------------------------------------------------------------------
# Descriptive stats
# Discovery
df2use = Dverbal_Discovery[,c("subjectkey","dataset_id","collection_id","interview_age","sex","z_ds_group","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")]
df2use$age = df2use$interview_age/12
cols2use = c("z_ds_group","sex","age","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")
res=describeBy(x = df2use[,cols2use], group = "z_ds_group")
res
##
## Descriptive statistics by group
## group: RRB_over_SC
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 282 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00
## sex* 2 282 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 282 9.89 4.20 9.33 9.55 3.71 2.00 27.17 25.17
## ados_age 4 39 94.54 47.03 86.00 92.00 57.82 27.00 202.00 175.00
## ados_sa_css 5 39 6.49 2.28 7.00 6.55 2.97 2.00 10.00 8.00
## ados_rrb_css 6 39 7.87 1.89 8.00 8.09 1.48 1.00 10.00 9.00
## iq 7 76 100.91 19.83 102.00 102.69 17.79 42.00 139.00 97.00
## dbaes_atotal 8 282 0.23 0.11 0.22 0.23 0.12 0.00 0.51 0.51
## dbaes_btotal 9 282 0.45 0.13 0.45 0.45 0.13 0.14 0.79 0.65
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 0.88 1.18 0.25
## ados_age 0.45 -0.92 7.53
## ados_sa_css -0.22 -1.04 0.37
## ados_rrb_css -1.34 2.55 0.30
## iq -0.89 0.85 2.27
## dbaes_atotal 0.09 -0.65 0.01
## dbaes_btotal 0.10 -0.36 0.01
## ------------------------------------------------------------
## group: SC_equal_RRB
## vars n mean sd median trimmed mad min max range skew
## z_ds_group* 1 338 2.00 0.00 2.00 2.00 0.00 2 2.00 0.00 NaN
## sex* 2 338 NaN NA NA NaN NA Inf -Inf -Inf NA
## age 3 338 9.31 5.95 8.25 8.46 4.82 0 45.75 45.75 2.08
## ados_age 4 54 77.09 40.60 60.50 72.30 34.84 33 182.00 149.00 0.85
## ados_sa_css 5 54 6.83 2.10 7.00 6.98 1.48 1 10.00 9.00 -0.58
## ados_rrb_css 6 54 7.65 2.34 8.00 8.05 1.48 1 10.00 9.00 -1.58
## iq 7 86 106.20 15.96 107.00 106.57 17.79 64 138.00 74.00 -0.22
## dbaes_atotal 8 338 0.29 0.13 0.29 0.30 0.14 0 0.67 0.67 -0.02
## dbaes_btotal 9 338 0.30 0.13 0.30 0.31 0.13 0 0.68 0.68 -0.10
## kurtosis se
## z_ds_group* NaN 0.00
## sex* NA NA
## age 6.83 0.32
## ados_age -0.37 5.52
## ados_sa_css 0.12 0.29
## ados_rrb_css 2.13 0.32
## iq -0.46 1.72
## dbaes_atotal -0.31 0.01
## dbaes_btotal -0.16 0.01
## ------------------------------------------------------------
## group: SC_over_RRB
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 269 3.00 0.00 3.00 3.00 0.00 3.00 3.00 0.00
## sex* 2 269 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 269 7.39 5.02 5.92 6.52 2.97 1.67 37.33 35.67
## ados_age 4 60 72.73 37.62 66.00 67.00 29.65 30.00 172.00 142.00
## ados_sa_css 5 60 7.33 1.69 7.00 7.35 1.48 4.00 10.00 6.00
## ados_rrb_css 6 60 7.88 2.10 8.00 8.21 1.48 1.00 10.00 9.00
## iq 7 37 104.81 18.05 111.00 105.77 13.34 40.00 140.00 100.00
## dbaes_atotal 8 269 0.45 0.14 0.46 0.45 0.14 0.11 0.87 0.76
## dbaes_btotal 9 269 0.24 0.11 0.23 0.23 0.11 0.00 0.57 0.57
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 2.31 7.14 0.31
## ados_age 1.18 0.51 4.86
## ados_sa_css -0.07 -0.88 0.22
## ados_rrb_css -1.63 3.19 0.27
## iq -1.08 2.46 2.97
## dbaes_atotal 0.13 -0.12 0.01
## dbaes_btotal 0.25 -0.25 0.01
# Replication
df2use = Dverbal_Replication[,c("subjectkey","dataset_id","collection_id","interview_age","sex","z_ds_group","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")]
df2use$age = df2use$interview_age/12
cols2use = c("z_ds_group","sex","age","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")
res=describeBy(x = df2use[,cols2use], group = "z_ds_group")
res
##
## Descriptive statistics by group
## group: RRB_over_SC
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 265 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00
## sex* 2 265 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 265 9.80 5.00 9.17 9.23 4.08 2.58 30.67 28.08
## ados_age 4 29 85.28 46.93 76.00 80.64 51.89 36.00 196.00 160.00
## ados_sa_css 5 29 6.86 2.00 7.00 6.92 1.48 3.00 10.00 7.00
## ados_rrb_css 6 29 7.48 1.99 8.00 7.64 1.48 1.00 10.00 9.00
## iq 7 77 102.53 18.34 104.00 103.24 16.31 57.00 152.00 95.00
## dbaes_atotal 8 265 0.23 0.11 0.23 0.23 0.11 0.01 0.61 0.60
## dbaes_btotal 9 265 0.47 0.13 0.46 0.46 0.11 0.15 0.93 0.78
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 1.40 2.77 0.31
## ados_age 0.84 -0.30 8.71
## ados_sa_css -0.21 -0.83 0.37
## ados_rrb_css -1.10 1.62 0.37
## iq -0.27 0.41 2.09
## dbaes_atotal 0.57 0.64 0.01
## dbaes_btotal 0.43 0.70 0.01
## ------------------------------------------------------------
## group: SC_equal_RRB
## vars n mean sd median trimmed mad min max range skew
## z_ds_group* 1 374 2.00 0.00 2.00 2.00 0.00 2 2.00 0.00 NaN
## sex* 2 374 NaN NA NA NaN NA Inf -Inf -Inf NA
## age 3 374 8.89 5.05 8.25 8.22 4.45 0 33.83 33.83 1.59
## ados_age 4 66 81.97 39.90 77.00 78.04 47.44 35 188.00 153.00 0.65
## ados_sa_css 5 66 6.85 2.14 7.00 6.91 2.97 2 10.00 8.00 -0.13
## ados_rrb_css 6 66 7.11 2.60 8.00 7.46 1.48 1 10.00 9.00 -1.18
## iq 7 72 108.38 15.57 108.00 107.78 14.08 69 146.00 77.00 0.29
## dbaes_atotal 8 374 0.30 0.14 0.30 0.30 0.14 0 0.74 0.74 0.02
## dbaes_btotal 9 374 0.32 0.14 0.31 0.32 0.14 0 0.81 0.81 0.08
## kurtosis se
## z_ds_group* NaN 0.00
## sex* NA NA
## age 3.92 0.26
## ados_age -0.51 4.91
## ados_sa_css -0.88 0.26
## ados_rrb_css 0.56 0.32
## iq -0.12 1.84
## dbaes_atotal -0.23 0.01
## dbaes_btotal -0.02 0.01
## ------------------------------------------------------------
## group: SC_over_RRB
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 251 3.00 0.00 3.00 3.00 0.00 3.00 3.00 0.00
## sex* 2 251 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 251 7.95 6.06 6.08 6.73 3.21 0.00 40.92 40.92
## ados_age 4 58 72.14 28.95 66.50 69.04 25.20 30.00 141.00 111.00
## ados_sa_css 5 58 7.03 1.86 7.00 7.06 1.48 3.00 10.00 7.00
## ados_rrb_css 6 58 7.60 2.05 8.00 7.79 1.48 1.00 10.00 9.00
## iq 7 37 110.92 17.82 111.00 111.68 17.79 62.00 146.00 84.00
## dbaes_atotal 8 251 0.47 0.13 0.46 0.46 0.14 0.14 0.96 0.82
## dbaes_btotal 9 251 0.25 0.12 0.25 0.25 0.12 0.00 0.66 0.66
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 2.21 5.71 0.38
## ados_age 0.99 0.19 3.80
## ados_sa_css 0.03 -0.78 0.24
## ados_rrb_css -1.09 1.45 0.27
## iq -0.52 -0.12 2.93
## dbaes_atotal 0.41 0.30 0.01
## dbaes_btotal 0.14 -0.11 0.01
#------------------------------------------------------------------------------
# Tests of differences in interview age across the subtypes
# Discovery
mod2use = lm(formula = interview_age ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: interview_age
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 136192 68096 17.714 2.861e-08 ***
## Residuals 886 3405874 3844
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = interview_age ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: interview_age
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 63457 31728 7.7318 0.0004689 ***
## Residuals 887 3639907 4104
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in SC across the subtypes
# Discovery
mod2use = lm(formula = dbaes_atotal ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_atotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 7.1174 3.5587 210.86 < 2.2e-16 ***
## Residuals 886 14.9533 0.0169
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = dbaes_atotal ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_atotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 7.5809 3.7904 225.12 < 2.2e-16 ***
## Residuals 887 14.9345 0.0168
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in RRB across the subtypes
# Discovery
mod2use = lm(formula = dbaes_btotal ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_btotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 6.6165 3.3083 213.82 < 2.2e-16 ***
## Residuals 886 13.7083 0.0155
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = dbaes_btotal ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_btotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 6.6364 3.3182 187.56 < 2.2e-16 ***
## Residuals 887 15.6922 0.0177
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in ADOS SA CSS across the subtypes
# Discovery
mod2use = lm(formula = ados_sa_css ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_sa_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 17.89 8.9468 2.2346 0.1106
## Residuals 150 600.58 4.0038
# Replication
mod2use = lm(formula = ados_sa_css ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_sa_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 1.19 0.5973 0.1479 0.8627
## Residuals 150 605.86 4.0391
#------------------------------------------------------------------------------
# Tests of differences in ADOS RRB CSS across the subtypes
# Discovery
mod2use = lm(formula = ados_rrb_css ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_rrb_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 1.86 0.9309 0.2033 0.8163
## Residuals 150 686.86 4.5790
# Replication
mod2use = lm(formula = ados_rrb_css ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_rrb_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 8.13 4.0625 0.772 0.4639
## Residuals 150 789.38 5.2625
#------------------------------------------------------------------------------
# Tests of differences in IQ across the subtypes
# Discovery
mod2use = lm(formula = iq ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: iq
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 1165 582.52 1.8162 0.1654
## Residuals 196 62864 320.73
# Replication
mod2use = lm(formula = iq ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: iq
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 2187 1093.72 3.6915 0.02681 *
## Residuals 183 54219 296.28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Make scatterplots with difference score Z subtypes in different colors
maxScores = c(3,4)
p_disc = ggplot(data = Dverbal_Discovery, aes(x = dbaes_atotal, y = dbaes_btotal, colour = z_ds_group)) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,1) + xlim(0,1) + ggtitle("NDAR Discovery")
p1_top_left = p_disc + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Disc_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p1_top_left)
p_disc
table(Dverbal_Discovery$z_ds_group)
##
## RRB_over_SC SC_equal_RRB SC_over_RRB
## 282 338 269
cor_res = cor.test(Dverbal_Discovery$dbaes_atotal, Dverbal_Discovery$dbaes_btotal)
cor_res
##
## Pearson's product-moment correlation
##
## data: Dverbal_Discovery$dbaes_atotal and Dverbal_Discovery$dbaes_btotal
## t = 6.6829, df = 887, p-value = 4.134e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1554332 0.2806578
## sample estimates:
## cor
## 0.2189469
p_rep = ggplot(data = Dverbal_Replication, aes(x = dbaes_atotal, y = dbaes_btotal, colour = z_ds_group)) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,1) + xlim(0,1) + ggtitle("NDAR Replication")
p2_bottom_left = p_rep + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Rep_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p2_bottom_left)
p_rep
table(Dverbal_Replication$z_ds_group)
##
## RRB_over_SC SC_equal_RRB SC_over_RRB
## 265 374 251
cor_res = cor.test(Dverbal_Replication$dbaes_atotal, Dverbal_Replication$dbaes_btotal)
cor_res
##
## Pearson's product-moment correlation
##
## data: Dverbal_Replication$dbaes_atotal and Dverbal_Replication$dbaes_btotal
## t = 7.1459, df = 888, p-value = 1.864e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1700824 0.2943934
## sample estimates:
## cor
## 0.2331903
#------------------------------------------------------------------------------
# Run supervised model with Discovery as training and Replication as Test
# run validation
# make subtypes using z-scores computed from the mean and sd of the training set
train_data = Dverbal_Discovery
test_data = Dverbal_Replication
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
tmp_train = make_subtype(data2use = train_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
mean2use = mean(test_data[,vars2use[1]] - test_data[,vars2use[2]])
sd2use = sd(test_data[,vars2use[1]] - test_data[,vars2use[2]])
tmp_test = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#==============================================================================
#==============================================================================
# make labels based on train mean and sd
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
train_mean = mean2use
train_sd = sd2use
pred_labels = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = train_mean,
sd2use = train_sd)
confmat = table(tmp_test$z_ds_group,pred_labels$z_ds_group)
acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/dim(pred_labels)[1]
# # compute model
# mod2use = svm(x = tmp_train[,vars2use], y = tmp_train$z_ds_group)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
#==============================================================================
#==============================================================================
# plot confusion matrix
setHook("grid.newpage", function() pushViewport(viewport(x=1,y=1,width=0.9, height=0.9, name="vp", just=c("right","top"))), action="prepend")
pheatmap(confmat/rowSums(confmat)*100, display_numbers = confmat, color = colorRampPalette(c('white','red'))(100), cluster_rows = FALSE, cluster_cols = FALSE, fontsize_number = fontSize, fontsize_row = fontSize, fontsize_col = fontSize,labels_row = c("RRB>SC","SC=RRB","SC>RRB"),labels_col = c("RRB>SC","SC=RRB","SC>RRB"),angle_col=90,
breaks= seq(0,100, length=100))
setHook("grid.newpage", NULL, "replace")
grid::grid.text("Actual Labels", y=-0.07, gp=gpar(fontsize=fontSize))
grid::grid.text("Predicted Labels", x=-0.07, rot=90, gp=gpar(fontsize=fontSize))
# show accuracy
true_accuracy = acc
true_accuracy
## [1] 0.9775281
#================================================================================
#================================================================================
# #------------------------------------------------------------------------------
# # Permute subtype labels to examine how well supervised model performs
#
# # nperm = 10000
#
# # make subtypes using z-scores computed from the mean and sd of the training set
# train_data = Dverbal_Discovery
# test_data = Dverbal_Replication
# mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
# sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
# tmp_train = make_subtype(data2use = train_data,
# z_thresh = z_thresh,
# mean2use = mean2use,
# sd2use = sd2use)
#
# mean2use = mean(test_data[,vars2use[1]] - test_data[,vars2use[2]])
# sd2use = sd(test_data[,vars2use[1]] - test_data[,vars2use[2]])
# tmp_test = make_subtype(data2use = test_data,
# z_thresh = z_thresh,
# mean2use = mean2use,
# sd2use = sd2use)
#
# acc = vector(length = nperm)
# for (iperm in 1:nperm){
# # set seed for reproducibility
# set.seed(iperm)
#
# sc_perm = sample(train_data[,vars2use[1]])
# rrb_perm = sample(train_data[,vars2use[2]])
# perm_mean2use = mean(sc_perm - rrb_perm)
# perm_sd2use = sd(sc_perm - rrb_perm)
# # perm_mean2use = mean(train_data[,vars2use[1]] - rrb_perm)
# # perm_sd2use = sd(train_data[,vars2use[1]] - rrb_perm)
# pred_labels = make_subtype(data2use = tmp_test,
# z_thresh = z_thresh,
# mean2use = perm_mean2use,
# sd2use = perm_sd2use)
# confmat = table(tmp_test$z_ds_group,pred_labels$z_ds_group)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/dim(pred_labels)[1]
#
# # compute model
# permuted_labels = sample(tmp_train$z_ds_group)
# mod2use = svm(x = tmp_train[,vars2use], y = permuted_labels)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc[iperm] = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
# #============================================================================
# } # for (iperm in 1:nperm)
#
# df2plot = data.frame(Accuracy = acc)
# p = ggplot(data = df2plot, aes(x = Accuracy)) + geom_histogram() + geom_vline(xintercept=true_accuracy)
# p
#
# # compute p-value
# pval = sum(c(true_accuracy,acc)>=true_accuracy)/(nperm+1)
# pval
#================================================================================
#================================================================================
#------------------------------------------------------------------------------
# Plot difference score Z subtypes in Discovery set
maxScores = c(3,4)
# Discovery - make plot with all individuals shown as lines
df2use = Dverbal_Discovery[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(tmp_train$z_ds_group)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + ylim(0,1) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p3_middle_top = p + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Disc_jitterplot_z%s.pdf",as.character(z_thresh))), plot = p3_middle_top)
p
# Plot difference score Z subtypes in Replication set
maxScores = c(3,4)
# Replication - make plot with all individuals shown as lines
df2use = Dverbal_Replication[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(tmp_test$z_ds_group)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + ylim(0,1) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p4_middle_bottom = p + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Rep_jitterplot_z%s.pdf",as.character(z_thresh))), plot = p4_middle_bottom)
p
#------------------------------------------------------------------------------
# Apply NDAR subtypes to EU-AIMS LEAP data
# make SC and RRB percentages since EU-AIMS data is specified as percentages
Dverbal = read.csv(file.path(datapath,"tidy_verbal.csv"))
euaims_data = read.csv(file.path(datapath,"tidy_euaims.csv"))
mask1 = euaims_data$Diagnosis=="ASD"
mask2 = (is.na(euaims_data$A1_pct_severity) | is.na(euaims_data$A2_pct_severity) | is.na(euaims_data$A3_pct_severity) | is.na(euaims_data$B1_pct_severity) | is.na(euaims_data$B2_pct_severity) | is.na(euaims_data$B3_pct_severity) | is.na(euaims_data$B4_pct_severity))
euaims_data = subset(euaims_data, (mask1 & !mask2))
Dverbal[,vars2use[1]] = Dverbal[,vars2use[1]]
Dverbal[,vars2use[2]] = Dverbal[,vars2use[2]]
euaims_data[,vars2use[1]] = (euaims_data$A1_pct_severity +
euaims_data$A2_pct_severity +
euaims_data$A3_pct_severity)/3
euaims_data[,vars2use[2]] = (euaims_data$B1_pct_severity +
euaims_data$B2_pct_severity +
euaims_data$B3_pct_severity +
euaims_data$B4_pct_severity)/4
train_data = Dverbal
test_data = euaims_data
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]], na.rm=TRUE)
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]], na.rm=TRUE)
c(mean2use, sd2use)
## [1] -0.01045243 0.19482749
tmp_train = make_subtype(data2use = train_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
tmp_test = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#===========================================================================
#===========================================================================
# # compute model
# mod2use = svm(x = tmp_train[,vars2use], y = tmp_train$z_ds_group)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
#
# tmp_test$svm_pred_labels = pred_labels
#
# # plot confusion matrix
# setHook("grid.newpage", function() pushViewport(viewport(x=1,y=1,width=0.9, height=0.9, name="vp", just=c("right","top"))), action="prepend")
# pheatmap(confmat/rowSums(confmat)*100, display_numbers = confmat, color = colorRampPalette(c('white','red'))(100), cluster_rows = FALSE, cluster_cols = FALSE, fontsize_number = fontSize, fontsize_row = fontSize, fontsize_col = fontSize,labels_row = c("RRB>SC","SC=RRB","SC>RRB"),labels_col = c("RRB>SC","SC=RRB","SC>RRB"),angle_col=90,
# breaks= seq(0,100, length=100))
# setHook("grid.newpage", NULL, "replace")
# grid::grid.text("Actual Labels", y=-0.07, gp=gpar(fontsize=fontSize))
# grid::grid.text("Predicted Labels", x=-0.07, rot=90, gp=gpar(fontsize=fontSize))
#===========================================================================
#===========================================================================
# scatterplot
p1 = ggplot(data = tmp_train, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(z_ds_group))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("NDAR ALL")
p1
p2 = ggplot(data = tmp_test, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(z_ds_group))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("EU-AIMS with Groups from NDAR ALL")
p2
#===========================================================================
#===========================================================================
# p3 = ggplot(data = tmp_test, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(pred_labels))) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("EU-AIMS")
# p5_bottom_right = p3 + guides(colour=FALSE)
# ggsave(filename = file.path(plotpath, sprintf("final_EUAIMS_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p5_bottom_right)
# p3
#===========================================================================
#===========================================================================
# # write out EU-AIMS LEAP data with subgroups defined by NDAR All
write.csv(tmp_test, file.path(datapath, sprintf("tidy_euaims_NDAR_subtypes_diffscore_z%s.csv",as.character(z_thresh))))
#===========================================================================
#===========================================================================
# # Make final plot
# p_final = p1_top_left + p3_middle_top + plot_spacer() + p2_bottom_left + p4_middle_bottom + p5_bottom_right + plot_layout(nrow=3, ncol=3, widths = c(4,4,4), heights = c(8,8,8))
# ggsave(filename = file.path(plotpath, sprintf("final_NDAR_EUAIMS_subtypes_plot_z%s.pdf",as.character(z_thresh))), plot = p_final)
# p_final
#===========================================================================
#===========================================================================
#------------------------------------------------------------------------------
# Integrate subtypes with rest of EU-AIMS LEAP data
euaims_data = read.csv(file.path(datapath,"tidy_euaims.csv"))
td_df = subset(euaims_data, euaims_data$Diagnosis=="TD",select=2:6)
td_df$A_pct_severity = as.numeric(NA)
td_df$B_pct_severity = as.numeric(NA)
td_df$subgrp = "TD"
#===========================================================================
#===========================================================================
# cols2use = c("subid","age","Centre","Schedule","Diagnosis","dbaes_atotal","dbaes_btotal","svm_pred_labels")
cols2use = c("subid","age","Centre","Schedule","Diagnosis","dbaes_atotal","dbaes_btotal","z_ds_group")
#===========================================================================
#===========================================================================
tmp_asd = tmp_test[,cols2use]
colnames(tmp_asd)[6] = "A_pct_severity"
colnames(tmp_asd)[7] = "B_pct_severity"
colnames(tmp_asd)[8] = "subgrp"
asd_df = tmp_asd
all_data = rbind(td_df,asd_df)
fname = "/Users/mlombardo/Dropbox/euaims/data/rsfmri_preproc/euaims_preproc.xlsx"
pp_data = read_excel(fname)
mask = pp_data$notes=="ok"
pp_data = subset(pp_data, mask)
asd_df = merge(pp_data[,c("subid","sex")],asd_df, by = "subid")
td_df = merge(pp_data[,c("subid","sex")],td_df, by = "subid")
all_data = rbind(td_df,asd_df)
data2write = merge(pp_data, all_data, by = "subid")
data2write$age = data2write$age.x
data2write$sex = data2write$sex.y
print(table(data2write$Schedule, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## A 9 26 45 78
## B 7 28 50 83
## C 7 22 34 59
## D 1 6 31 23
print(table(data2write$Centre, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## CAMBRIDGE 6 22 14 29
## KINGS_COLLEGE 12 28 66 78
## MANNHEIM 0 0 0 34
## NIJMEGEN 6 26 61 64
## UTRECHT 0 6 19 38
print(table(data2write$sex, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## Female 7 21 43 88
## Male 17 61 117 155
#DSM-5 - find best split that balances participants across sites
a = findBestSplit(asd_df, seed_range = c(172342)) #,300001:500000))
## [1] 172342
best_seeds = a$seed[!is.na(a$discrepancy) & a$discrepancy==min(a$discrepancy, na.rm = TRUE)]
print(best_seeds)
## [1] 172342
# Split datasets -------------------------------------------------------------
rngSeed = best_seeds[1]
# split Schedule A dataset
dset2use = subset(asd_df, asd_df$Schedule=="A")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
A_Discovery = tmp_d[[2]]
A_Replication = tmp_d[[1]]
# split Schedule B dataset
dset2use = subset(asd_df, asd_df$Schedule=="B")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
B_Discovery = tmp_d[[2]]
B_Replication = tmp_d[[1]]
# split Schedule C dataset
dset2use = subset(asd_df, asd_df$Schedule=="C")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
C_Discovery = tmp_d[[2]]
C_Replication = tmp_d[[1]]
# split Schedule D dataset
dset2use = subset(asd_df, asd_df$Schedule=="D")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
D_Discovery = tmp_d[[2]]
D_Replication = tmp_d[[1]]
df_Disc = rbind(A_Discovery, B_Discovery, C_Discovery, D_Discovery)
df_Rep = rbind(A_Replication, B_Replication, C_Replication, D_Replication)
a = table(df_Disc$Schedule, df_Disc$Centre); print(a)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A 6 17 10 7
## B 9 16 14 3
## C 6 9 14 2
## D 0 10 10 0
b = table(df_Rep$Schedule, df_Rep$Centre); print(b)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A 7 18 10 5
## B 8 17 13 5
## C 6 10 13 3
## D 0 9 9 0
print(a-b)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A -1 -1 0 2
## B 1 -1 1 -2
## C 0 -1 1 -1
## D 0 1 1 0
print(sum(rowSums(abs(a-b))))
## [1] 14
data2write$dataset = NA
mask = is.element(data2write$subid,df_Disc$subid)
data2write[mask,"dataset"] = "Discovery"
mask = is.element(data2write$subid,df_Rep$subid)
data2write[mask,"dataset"] = "Replication"
asd_Disc = subset(data2write, data2write$dataset=="Discovery" & data2write$Diagnosis=="ASD")
asd_Rep = subset(data2write, data2write$dataset=="Replication" & data2write$Diagnosis=="ASD")
# # find which seed gives best TD age-match -------------------------------------
# seeds = 1:1000
# pvals = data.frame(matrix(nrow = length(seeds), ncol = 2))
# for (i in 1:length(seeds)) {
# res = findTDAgeMatch(data2write, seed_range = c(seeds[i],seeds[i]))
# td_Disc_matched = res[[2]]
# td_Rep_matched = res[[1]]
# tres = t.test(td_Disc_matched$age, asd_Disc$age)
# pvals[i,1] = tres$p.value
# tres = t.test(td_Rep_matched$age, asd_Rep$age)
# pvals[i,2] = tres$p.value
# #print(i)
# }
# a = sort.int(pvals[,1], decreasing = TRUE, index.return = TRUE)
# b=pvals[a$ix,]
seed2use = 929
res = findTDAgeMatch(data2write, seed_range = c(seed2use,seed2use))
td_Disc_matched = res[[2]]
td_Rep_matched = res[[1]]
mask = is.element(data2write$subid, td_Disc_matched$subid)
data2write$dataset[mask] = "Discovery"
mask = is.element(data2write$subid, td_Rep_matched$subid)
data2write$dataset[mask] = "Replication"
print(table(data2write$dataset, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## Discovery 17 38 78 121
## Replication 7 44 82 122
fname2save = here(sprintf("asd_subgrp_data_rsfmri_ALL_DSM5_diffzscoreGrps_z%s.csv",as.character(z_thresh)))
write.csv(data2write,file = fname2save)
#------------------------------------------------------------------------------
# Descriptive stats
df2use = data2write[,c("subid","Diagnosis","dataset","Centre","meanFD","age","sex","subgrp","viq_all","piq_all","fsiq4_all","A_pct_severity","B_pct_severity","ADI_social_total","ADI_communication_total","ADI_RRB_total","ados_2_SA_CSS","ados_2_RRB_CSS","SRS_tscore","SRS_tscore_self","RBS_total","SSP_total","vabsdscoresc_dss","vabsdscoresd_dss","vabsdscoress_dss","vabsabcabc_standard")]
mask = df2use==999 | df2use==777
df2use[mask] = NA
df2use$age = df2use$age/365
cols2use = c("dataset","subgrp","age","meanFD","viq_all","piq_all","fsiq4_all","A_pct_severity","B_pct_severity","ADI_social_total","ADI_communication_total","ADI_RRB_total","ados_2_SA_CSS","ados_2_RRB_CSS","SRS_tscore","SRS_tscore_self","RBS_total","SSP_total","vabsdscoresc_dss","vabsdscoresd_dss","vabsdscoress_dss","vabsabcabc_standard")
res=describeBy(x = df2use[,cols2use], group = c("subgrp","dataset"))
res
##
## Descriptive statistics by group
## subgrp: RRB_over_SC
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 17 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 17 NaN NA NA NaN NA Inf -Inf
## age 3 17 16.57 6.24 15.83 16.69 9.67 7.08 24.29
## meanFD 4 17 0.25 0.24 0.18 0.20 0.05 0.06 1.14
## viq_all 5 17 97.45 15.81 96.00 97.32 20.76 73.00 123.85
## piq_all 6 17 96.82 17.03 96.00 96.87 16.31 64.00 129.00
## fsiq4_all 7 17 96.90 15.72 96.00 97.49 20.76 67.00 118.01
## A_pct_severity 8 17 0.21 0.14 0.15 0.21 0.15 0.00 0.49
## B_pct_severity 9 17 0.41 0.14 0.42 0.41 0.16 0.13 0.61
## ADI_social_total 10 17 16.29 7.69 16.00 16.60 8.90 2.00 26.00
## ADI_communication_total 11 17 14.29 6.72 15.00 14.47 7.41 2.00 24.00
## ADI_RRB_total 12 17 7.29 2.23 7.00 7.40 2.97 3.00 10.00
## ados_2_SA_CSS 13 17 4.76 2.93 3.00 4.73 2.97 1.00 9.00
## ados_2_RRB_CSS 14 17 4.76 3.83 5.00 4.67 5.93 1.00 10.00
## SRS_tscore 15 12 74.33 10.35 74.00 74.40 12.60 58.00 90.00
## SRS_tscore_self 16 7 61.29 6.13 63.00 61.29 2.97 49.00 68.00
## RBS_total 17 11 17.45 9.82 19.00 17.00 14.83 6.00 33.00
## SSP_total 18 7 139.00 20.08 140.00 139.00 28.17 116.00 167.00
## vabsdscoresc_dss 19 13 72.77 21.44 77.00 72.91 14.83 29.00 115.00
## vabsdscoresd_dss 20 13 65.62 13.04 68.00 67.18 8.90 31.00 83.00
## vabsdscoress_dss 21 13 67.38 13.88 69.00 68.55 16.31 35.00 87.00
## vabsabcabc_standard 22 13 70.15 9.17 70.00 69.64 10.38 58.00 88.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 17.20 -0.10 -1.68 1.51
## meanFD 1.09 2.88 7.83 0.06
## viq_all 50.85 -0.11 -1.38 3.84
## piq_all 65.00 -0.27 -0.55 4.13
## fsiq4_all 51.01 -0.28 -1.21 3.81
## A_pct_severity 0.49 0.38 -1.15 0.03
## B_pct_severity 0.48 -0.31 -1.15 0.03
## ADI_social_total 24.00 -0.26 -1.41 1.86
## ADI_communication_total 22.00 -0.22 -1.19 1.63
## ADI_RRB_total 7.00 -0.41 -1.03 0.54
## ados_2_SA_CSS 8.00 0.23 -1.73 0.71
## ados_2_RRB_CSS 9.00 0.11 -1.90 0.93
## SRS_tscore 32.00 0.11 -1.46 2.99
## SRS_tscore_self 19.00 -0.92 -0.46 2.32
## RBS_total 27.00 0.21 -1.60 2.96
## SSP_total 51.00 0.13 -1.88 7.59
## vabsdscoresc_dss 86.00 -0.30 -0.05 5.95
## vabsdscoresd_dss 52.00 -1.17 1.20 3.62
## vabsdscoress_dss 52.00 -0.69 -0.17 3.85
## vabsabcabc_standard 30.00 0.24 -1.05 2.54
## ------------------------------------------------------------
## subgrp: SC_equal_RRB
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 38 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 38 NaN NA NA NaN NA Inf -Inf
## age 3 38 16.43 6.01 14.92 16.02 4.75 7.75 30.28
## meanFD 4 38 0.36 0.63 0.24 0.25 0.17 0.04 3.95
## viq_all 5 38 97.98 18.61 102.00 97.87 16.29 64.00 136.00
## piq_all 6 38 101.00 19.23 104.50 101.52 17.03 61.00 142.00
## fsiq4_all 7 38 99.39 17.45 105.00 99.91 20.02 60.00 131.00
## A_pct_severity 8 38 0.32 0.14 0.31 0.31 0.13 0.03 0.63
## B_pct_severity 9 38 0.33 0.15 0.31 0.32 0.13 0.02 0.69
## ADI_social_total 10 38 17.55 7.28 19.00 17.94 6.67 3.00 27.00
## ADI_communication_total 11 38 14.42 6.13 14.00 14.50 6.67 0.00 26.00
## ADI_RRB_total 12 38 5.63 2.40 6.00 5.62 2.22 1.00 12.00
## ados_2_SA_CSS 13 37 6.38 2.56 7.00 6.48 2.97 1.00 10.00
## ados_2_RRB_CSS 14 37 5.24 2.55 6.00 5.32 1.48 1.00 10.00
## SRS_tscore 15 36 71.42 11.97 74.00 71.67 11.86 47.00 90.00
## SRS_tscore_self 16 22 61.86 12.11 61.50 61.39 14.08 43.00 89.00
## RBS_total 17 34 17.06 13.32 15.00 15.43 10.38 0.00 53.00
## SSP_total 18 23 134.35 31.32 137.00 134.58 44.48 81.00 187.00
## vabsdscoresc_dss 19 37 72.24 18.80 75.00 73.10 11.86 21.00 122.00
## vabsdscoresd_dss 20 36 72.81 15.98 72.50 73.50 11.86 25.00 105.00
## vabsdscoress_dss 21 37 71.59 16.35 74.00 73.13 11.86 20.00 95.00
## vabsabcabc_standard 22 36 70.03 14.39 72.00 71.17 9.64 20.00 100.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 22.53 0.64 -0.62 0.98
## meanFD 3.91 4.98 25.67 0.10
## viq_all 72.00 -0.15 -0.77 3.02
## piq_all 81.00 -0.33 -0.45 3.12
## fsiq4_all 71.00 -0.29 -0.74 2.83
## A_pct_severity 0.60 0.18 -0.44 0.02
## B_pct_severity 0.67 0.40 -0.37 0.02
## ADI_social_total 24.00 -0.51 -0.97 1.18
## ADI_communication_total 26.00 -0.18 -0.67 0.99
## ADI_RRB_total 11.00 0.07 -0.16 0.39
## ados_2_SA_CSS 9.00 -0.30 -0.97 0.42
## ados_2_RRB_CSS 9.00 -0.56 -0.77 0.42
## SRS_tscore 43.00 -0.29 -1.05 1.99
## SRS_tscore_self 46.00 0.24 -0.92 2.58
## RBS_total 53.00 1.11 0.75 2.28
## SSP_total 106.00 -0.08 -1.16 6.53
## vabsdscoresc_dss 101.00 -0.37 1.24 3.09
## vabsdscoresd_dss 80.00 -0.51 0.85 2.66
## vabsdscoress_dss 75.00 -1.09 1.28 2.69
## vabsabcabc_standard 80.00 -1.12 2.65 2.40
## ------------------------------------------------------------
## subgrp: SC_over_RRB
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 78 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 78 NaN NA NA NaN NA Inf -Inf
## age 3 78 16.28 5.14 15.96 16.12 5.11 7.56 29.40
## meanFD 4 78 0.22 0.21 0.15 0.18 0.10 0.03 1.08
## viq_all 5 76 97.81 19.26 99.00 97.59 19.27 61.00 142.00
## piq_all 6 76 99.49 22.56 102.40 100.25 21.34 52.43 150.00
## fsiq4_all 7 78 99.02 19.54 102.25 99.52 19.60 59.00 143.00
## A_pct_severity 8 78 0.42 0.14 0.43 0.41 0.13 0.12 0.82
## B_pct_severity 9 78 0.18 0.11 0.16 0.17 0.12 0.00 0.46
## ADI_social_total 10 78 17.60 6.32 18.00 17.91 7.41 3.00 28.00
## ADI_communication_total 11 78 14.13 5.03 14.50 14.28 5.19 2.00 24.00
## ADI_RRB_total 12 78 3.27 2.20 3.00 3.14 1.48 0.00 10.00
## ados_2_SA_CSS 13 76 6.34 2.55 7.00 6.48 2.97 1.00 10.00
## ados_2_RRB_CSS 14 76 4.59 2.77 5.00 4.50 2.97 1.00 10.00
## SRS_tscore 15 66 72.73 12.13 74.00 73.28 14.08 44.00 95.00
## SRS_tscore_self 16 31 62.94 12.19 61.00 61.72 10.38 42.00 94.00
## RBS_total 17 65 17.20 16.86 15.00 14.58 16.31 0.00 90.00
## SSP_total 18 49 138.29 30.55 139.00 139.22 37.06 53.00 189.00
## vabsdscoresc_dss 19 73 72.53 15.99 72.00 73.24 11.86 21.00 107.00
## vabsdscoresd_dss 20 73 72.11 17.06 73.00 71.68 14.83 17.00 131.00
## vabsdscoress_dss 21 73 68.18 15.73 69.00 69.37 13.34 20.00 104.00
## vabsabcabc_standard 22 73 68.81 14.97 71.00 69.54 10.38 6.00 103.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 21.85 0.29 -0.48 0.58
## meanFD 1.05 2.28 5.30 0.02
## viq_all 81.00 0.09 -0.57 2.21
## piq_all 97.57 -0.29 -0.55 2.59
## fsiq4_all 84.00 -0.26 -0.84 2.21
## A_pct_severity 0.70 0.23 -0.22 0.02
## B_pct_severity 0.46 0.44 -0.53 0.01
## ADI_social_total 25.00 -0.35 -0.84 0.72
## ADI_communication_total 22.00 -0.22 -0.69 0.57
## ADI_RRB_total 10.00 0.63 -0.01 0.25
## ados_2_SA_CSS 9.00 -0.49 -0.86 0.29
## ados_2_RRB_CSS 9.00 -0.15 -1.21 0.32
## SRS_tscore 51.00 -0.29 -0.52 1.49
## SRS_tscore_self 52.00 0.82 0.34 2.19
## RBS_total 90.00 1.75 4.17 2.09
## SSP_total 136.00 -0.36 -0.40 4.36
## vabsdscoresc_dss 86.00 -0.77 2.14 1.87
## vabsdscoresd_dss 114.00 0.22 2.19 2.00
## vabsdscoress_dss 84.00 -0.82 1.15 1.84
## vabsabcabc_standard 97.00 -1.25 4.41 1.75
## ------------------------------------------------------------
## subgrp: TD
## dataset: Discovery
## vars n mean sd median trimmed mad min
## dataset* 1 121 NaN NA NA NaN NA Inf
## subgrp* 2 121 NaN NA NA NaN NA Inf
## age 3 121 16.83 5.23 16.65 16.73 5.69 7.22
## meanFD 4 121 0.18 0.15 0.13 0.15 0.07 0.03
## viq_all 5 119 104.52 19.70 105.00 105.03 19.27 46.00
## piq_all 6 119 106.10 19.47 107.00 107.53 17.79 49.00
## fsiq4_all 7 119 105.72 18.33 108.18 106.99 16.58 53.00
## A_pct_severity 8 0 NaN NA NA NaN NA Inf
## B_pct_severity 9 0 NaN NA NA NaN NA Inf
## ADI_social_total 10 0 NaN NA NA NaN NA Inf
## ADI_communication_total 11 0 NaN NA NA NaN NA Inf
## ADI_RRB_total 12 0 NaN NA NA NaN NA Inf
## ados_2_SA_CSS 13 0 NaN NA NA NaN NA Inf
## ados_2_RRB_CSS 14 0 NaN NA NA NaN NA Inf
## SRS_tscore 15 68 47.84 9.40 45.00 46.32 5.19 37.00
## SRS_tscore_self 16 71 46.69 4.85 46.00 46.26 4.45 39.00
## RBS_total 17 68 2.15 4.74 0.00 0.95 0.00 0.00
## SSP_total 18 59 177.86 12.71 182.00 179.78 8.90 122.00
## vabsdscoresc_dss 19 34 91.97 25.44 99.50 93.50 21.50 21.00
## vabsdscoresd_dss 20 34 90.74 20.28 98.50 92.25 17.05 33.00
## vabsdscoress_dss 21 34 96.21 23.67 102.50 98.57 21.50 33.00
## vabsabcabc_standard 22 34 92.06 23.04 99.50 93.89 15.57 25.00
## max range skew kurtosis se
## dataset* -Inf -Inf NA NA NA
## subgrp* -Inf -Inf NA NA NA
## age 29.84 22.62 0.17 -0.51 0.48
## meanFD 0.85 0.82 2.28 5.65 0.01
## viq_all 160.00 114.00 -0.24 0.38 1.81
## piq_all 147.00 98.00 -0.69 0.32 1.79
## fsiq4_all 142.00 89.00 -0.69 0.58 1.68
## A_pct_severity -Inf -Inf NA NA NA
## B_pct_severity -Inf -Inf NA NA NA
## ADI_social_total -Inf -Inf NA NA NA
## ADI_communication_total -Inf -Inf NA NA NA
## ADI_RRB_total -Inf -Inf NA NA NA
## ados_2_SA_CSS -Inf -Inf NA NA NA
## ados_2_RRB_CSS -Inf -Inf NA NA NA
## SRS_tscore 76.00 39.00 1.54 1.44 1.14
## SRS_tscore_self 63.00 24.00 0.93 1.10 0.58
## RBS_total 27.00 27.00 3.13 10.87 0.57
## SSP_total 190.00 68.00 -1.90 4.85 1.66
## vabsdscoresc_dss 138.00 117.00 -0.71 0.36 4.36
## vabsdscoresd_dss 121.00 88.00 -0.80 0.07 3.48
## vabsdscoress_dss 129.00 96.00 -0.83 -0.31 4.06
## vabsabcabc_standard 127.00 102.00 -0.90 0.30 3.95
## ------------------------------------------------------------
## subgrp: RRB_over_SC
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 7 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 7 NaN NA NA NaN NA Inf -Inf
## age 3 7 13.79 3.76 12.61 13.79 3.32 8.31 19.56
## meanFD 4 7 0.28 0.23 0.20 0.28 0.13 0.10 0.76
## viq_all 5 7 106.49 17.25 102.73 106.49 7.82 91.00 143.00
## piq_all 6 7 107.26 20.78 101.00 107.26 14.77 89.00 148.00
## fsiq4_all 7 7 107.43 18.50 104.00 107.43 7.84 93.00 148.00
## A_pct_severity 8 7 0.15 0.07 0.16 0.15 0.02 0.04 0.27
## B_pct_severity 9 7 0.32 0.09 0.30 0.32 0.08 0.18 0.45
## ADI_social_total 10 7 14.71 5.22 16.00 14.71 4.45 5.00 20.00
## ADI_communication_total 11 7 8.57 3.46 9.00 8.57 2.97 3.00 13.00
## ADI_RRB_total 12 7 4.71 1.98 5.00 4.71 1.48 1.00 7.00
## ados_2_SA_CSS 13 7 5.14 2.79 5.00 5.14 4.45 1.00 8.00
## ados_2_RRB_CSS 14 7 4.57 2.57 5.00 4.57 2.97 1.00 7.00
## SRS_tscore 15 6 68.00 11.82 72.00 68.00 8.90 48.00 79.00
## SRS_tscore_self 16 2 64.50 3.54 64.50 64.50 3.71 62.00 67.00
## RBS_total 17 5 14.60 10.01 13.00 14.60 8.90 5.00 30.00
## SSP_total 18 4 138.50 11.15 137.50 138.50 9.64 126.00 153.00
## vabsdscoresc_dss 19 7 82.29 18.51 74.00 82.29 7.41 68.00 117.00
## vabsdscoresd_dss 20 7 73.43 6.60 71.00 73.43 4.45 66.00 85.00
## vabsdscoress_dss 21 7 83.00 9.56 82.00 83.00 8.90 67.00 95.00
## vabsabcabc_standard 22 7 72.71 17.02 77.00 72.71 5.93 39.00 94.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 11.26 0.14 -1.43 1.42
## meanFD 0.66 1.15 -0.24 0.09
## viq_all 52.00 1.19 0.00 6.52
## piq_all 59.00 0.94 -0.72 7.85
## fsiq4_all 55.00 1.40 0.38 6.99
## A_pct_severity 0.23 0.06 -0.93 0.03
## B_pct_severity 0.27 0.07 -1.49 0.03
## ADI_social_total 15.00 -0.68 -1.03 1.97
## ADI_communication_total 10.00 -0.27 -1.53 1.31
## ADI_RRB_total 6.00 -0.66 -0.90 0.75
## ados_2_SA_CSS 7.00 -0.32 -1.74 1.06
## ados_2_RRB_CSS 6.00 -0.50 -1.70 0.97
## SRS_tscore 31.00 -0.64 -1.41 4.82
## SRS_tscore_self 5.00 0.00 -2.75 2.50
## RBS_total 25.00 0.47 -1.64 4.48
## SSP_total 27.00 0.19 -1.89 5.58
## vabsdscoresc_dss 49.00 0.89 -1.03 7.00
## vabsdscoresd_dss 19.00 0.56 -1.28 2.50
## vabsdscoress_dss 28.00 -0.31 -1.36 3.61
## vabsabcabc_standard 55.00 -0.81 -0.44 6.43
## ------------------------------------------------------------
## subgrp: SC_equal_RRB
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 44 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 44 NaN NA NA NaN NA Inf -Inf
## age 3 44 16.88 6.45 16.42 16.59 7.37 7.12 30.15
## meanFD 4 44 0.21 0.13 0.18 0.19 0.10 0.06 0.62
## viq_all 5 44 102.27 16.10 103.00 102.86 18.98 70.00 133.00
## piq_all 6 44 106.73 16.97 106.50 107.35 19.27 66.42 134.00
## fsiq4_all 7 44 104.78 16.10 106.91 105.38 17.18 69.00 130.00
## A_pct_severity 8 44 0.25 0.12 0.23 0.24 0.11 0.04 0.65
## B_pct_severity 9 44 0.25 0.14 0.23 0.24 0.12 0.00 0.67
## ADI_social_total 10 44 14.52 6.73 14.50 14.67 7.41 1.00 27.00
## ADI_communication_total 11 44 11.07 5.33 11.00 11.14 5.93 0.00 21.00
## ADI_RRB_total 12 44 4.11 2.53 4.00 4.08 2.97 0.00 9.00
## ados_2_SA_CSS 13 43 5.47 2.55 6.00 5.51 2.97 1.00 10.00
## ados_2_RRB_CSS 14 43 4.74 2.47 5.00 4.71 1.48 1.00 9.00
## SRS_tscore 15 39 64.90 11.78 62.00 64.58 11.86 43.00 90.00
## SRS_tscore_self 16 23 60.74 8.11 61.00 60.74 7.41 46.00 79.00
## RBS_total 17 38 14.18 11.93 11.50 12.78 11.12 0.00 52.00
## SSP_total 18 25 142.72 28.08 148.00 145.19 28.17 69.00 177.00
## vabsdscoresc_dss 19 41 82.41 14.55 81.00 82.15 14.83 50.00 122.00
## vabsdscoresd_dss 20 41 79.88 16.29 79.00 79.21 14.83 38.00 119.00
## vabsdscoress_dss 21 41 77.95 15.31 80.00 78.82 16.31 30.00 101.00
## vabsabcabc_standard 22 41 79.32 13.31 78.00 78.82 10.38 48.00 117.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 23.03 0.31 -1.01 0.97
## meanFD 0.56 1.24 1.15 0.02
## viq_all 63.00 -0.22 -0.91 2.43
## piq_all 67.58 -0.33 -0.69 2.56
## fsiq4_all 61.00 -0.28 -0.87 2.43
## A_pct_severity 0.61 0.72 1.12 0.02
## B_pct_severity 0.67 0.74 0.48 0.02
## ADI_social_total 26.00 -0.13 -0.93 1.01
## ADI_communication_total 21.00 -0.09 -0.82 0.80
## ADI_RRB_total 9.00 0.09 -0.81 0.38
## ados_2_SA_CSS 9.00 -0.17 -0.98 0.39
## ados_2_RRB_CSS 8.00 -0.37 -0.86 0.38
## SRS_tscore 47.00 0.24 -0.93 1.89
## SRS_tscore_self 33.00 0.06 -0.51 1.69
## RBS_total 52.00 1.26 1.64 1.94
## SSP_total 108.00 -0.82 -0.26 5.62
## vabsdscoresc_dss 72.00 0.29 0.10 2.27
## vabsdscoresd_dss 81.00 0.23 0.16 2.54
## vabsdscoress_dss 71.00 -0.61 0.61 2.39
## vabsabcabc_standard 69.00 0.43 0.33 2.08
## ------------------------------------------------------------
## subgrp: SC_over_RRB
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 82 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 82 NaN NA NA NaN NA Inf -Inf
## age 3 82 16.36 5.02 15.93 16.20 5.75 7.48 29.23
## meanFD 4 82 0.26 0.30 0.16 0.19 0.11 0.04 1.60
## viq_all 5 78 96.85 19.43 99.00 97.91 20.76 50.91 130.00
## piq_all 6 80 97.92 21.49 101.50 99.48 20.02 44.03 138.00
## fsiq4_all 7 79 97.98 19.63 103.00 98.81 19.81 59.00 139.00
## A_pct_severity 8 82 0.44 0.15 0.43 0.44 0.17 0.13 0.75
## B_pct_severity 9 82 0.20 0.11 0.21 0.19 0.14 0.00 0.47
## ADI_social_total 10 82 17.88 5.74 19.00 18.06 5.93 4.00 29.00
## ADI_communication_total 11 82 14.72 5.02 15.00 14.85 5.93 3.00 24.00
## ADI_RRB_total 12 82 3.78 2.34 3.00 3.62 1.48 0.00 10.00
## ados_2_SA_CSS 13 78 6.14 2.73 6.00 6.22 2.97 1.00 10.00
## ados_2_RRB_CSS 14 78 4.74 2.77 5.00 4.67 2.97 1.00 10.00
## SRS_tscore 15 73 73.48 11.33 73.00 74.03 13.34 48.00 90.00
## SRS_tscore_self 16 38 62.92 9.59 61.50 62.72 5.93 40.00 84.00
## RBS_total 17 72 17.53 14.53 13.00 15.76 11.12 0.00 73.00
## SSP_total 18 54 138.17 25.37 138.50 138.57 28.17 90.00 184.00
## vabsdscoresc_dss 19 73 74.45 14.87 75.00 74.81 10.38 21.00 110.00
## vabsdscoresd_dss 20 72 71.62 15.97 69.00 71.21 15.57 42.00 118.00
## vabsdscoress_dss 21 73 66.56 15.93 68.00 67.00 13.34 23.00 112.00
## vabsabcabc_standard 22 72 69.07 13.42 69.00 69.47 11.12 28.00 107.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 21.75 0.33 -0.56 0.55
## meanFD 1.55 3.13 10.11 0.03
## viq_all 79.09 -0.47 -0.56 2.20
## piq_all 93.97 -0.55 -0.58 2.40
## fsiq4_all 80.00 -0.38 -0.82 2.21
## A_pct_severity 0.62 0.08 -0.89 0.02
## B_pct_severity 0.47 0.27 -0.81 0.01
## ADI_social_total 25.00 -0.32 -0.61 0.63
## ADI_communication_total 21.00 -0.24 -0.83 0.55
## ADI_RRB_total 10.00 0.56 -0.37 0.26
## ados_2_SA_CSS 9.00 -0.14 -1.12 0.31
## ados_2_RRB_CSS 9.00 -0.21 -1.18 0.31
## SRS_tscore 42.00 -0.29 -0.91 1.33
## SRS_tscore_self 44.00 0.24 0.11 1.56
## RBS_total 73.00 1.28 1.65 1.71
## SSP_total 94.00 -0.14 -0.93 3.45
## vabsdscoresc_dss 89.00 -0.61 2.14 1.74
## vabsdscoresd_dss 76.00 0.41 0.01 1.88
## vabsdscoress_dss 89.00 -0.23 0.54 1.86
## vabsabcabc_standard 79.00 -0.35 1.26 1.58
## ------------------------------------------------------------
## subgrp: TD
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 122 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 122 NaN NA NA NaN NA Inf -Inf
## age 3 122 16.86 6.07 16.34 16.58 7.59 6.89 29.72
## meanFD 4 122 0.23 0.46 0.14 0.15 0.07 0.04 4.60
## viq_all 5 122 104.02 17.58 108.18 105.64 12.13 45.00 140.00
## piq_all 6 122 104.64 18.41 108.96 106.56 14.08 49.00 139.00
## fsiq4_all 7 122 104.94 17.14 108.09 107.29 11.86 50.00 134.00
## A_pct_severity 8 0 NaN NA NA NaN NA Inf -Inf
## B_pct_severity 9 0 NaN NA NA NaN NA Inf -Inf
## ADI_social_total 10 0 NaN NA NA NaN NA Inf -Inf
## ADI_communication_total 11 0 NaN NA NA NaN NA Inf -Inf
## ADI_RRB_total 12 0 NaN NA NA NaN NA Inf -Inf
## ados_2_SA_CSS 13 0 NaN NA NA NaN NA Inf -Inf
## ados_2_RRB_CSS 14 0 NaN NA NA NaN NA Inf -Inf
## SRS_tscore 15 65 47.23 9.34 44.00 45.66 4.45 37.00 90.00
## SRS_tscore_self 16 61 48.44 6.84 47.00 47.63 5.93 39.00 69.00
## RBS_total 17 63 3.08 11.54 0.00 0.86 0.00 0.00 89.00
## SSP_total 18 54 174.93 19.38 182.00 178.41 6.67 75.00 190.00
## vabsdscoresc_dss 19 39 92.74 25.45 96.00 95.82 20.76 21.00 125.00
## vabsdscoresd_dss 20 39 91.10 22.65 97.00 93.91 14.83 27.00 122.00
## vabsdscoress_dss 21 39 98.90 27.04 103.00 102.12 17.79 20.00 132.00
## vabsabcabc_standard 22 38 93.00 25.39 100.00 96.44 15.57 20.00 126.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 22.83 0.33 -0.97 0.55
## meanFD 4.56 7.54 64.83 0.04
## viq_all 95.00 -0.99 1.51 1.59
## piq_all 90.00 -0.93 0.67 1.67
## fsiq4_all 84.00 -1.28 1.67 1.55
## A_pct_severity -Inf NA NA NA
## B_pct_severity -Inf NA NA NA
## ADI_social_total -Inf NA NA NA
## ADI_communication_total -Inf NA NA NA
## ADI_RRB_total -Inf NA NA NA
## ados_2_SA_CSS -Inf NA NA NA
## ados_2_RRB_CSS -Inf NA NA NA
## SRS_tscore 53.00 2.14 5.82 1.16
## SRS_tscore_self 30.00 1.05 0.48 0.88
## RBS_total 89.00 6.60 45.89 1.45
## SSP_total 115.00 -2.88 10.92 2.64
## vabsdscoresc_dss 104.00 -1.21 1.00 4.08
## vabsdscoresd_dss 95.00 -1.34 1.26 3.63
## vabsdscoress_dss 112.00 -1.26 1.10 4.33
## vabsabcabc_standard 106.00 -1.42 1.40 4.12
#------------------------------------------------------------------------------
# Table of subtypes X sex
# Discovery
data2use = subset(data2write,data2write$dataset=="Discovery")
table(data2use$subgrp,data2use$sex)
##
## Female Male
## RRB_over_SC 6 11
## SC_equal_RRB 9 29
## SC_over_RRB 20 58
## TD 41 80
cs_res = chisq.test(data2use$subgrp,data2use$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: data2use$subgrp and data2use$sex
## X-squared = 2.5268, df = 3, p-value = 0.4705
# Replication
data2use = subset(data2write,data2write$dataset=="Replication")
table(data2use$subgrp,data2use$sex)
##
## Female Male
## RRB_over_SC 1 6
## SC_equal_RRB 12 32
## SC_over_RRB 23 59
## TD 47 75
cs_res = chisq.test(data2use$subgrp,data2use$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: data2use$subgrp and data2use$sex
## X-squared = 4.3621, df = 3, p-value = 0.2249
#------------------------------------------------------------------------------
vars2analyze = c("age","meanFD","viq_all","piq_all","fsiq4_all",
"A_pct_severity","B_pct_severity",
"ADI_social_total","ADI_communication_total","ADI_RRB_total",
"ados_2_SA_CSS","ados_2_RRB_CSS",
"SRS_tscore_self","RBS_total","SSP_total",
"vabsdscoress_dss","vabsdscoresd_dss","vabsdscoresc_dss","vabsabcabc_standard")
vnames = c("Age","Mean FD","VIQ","PIQ","FIQ","ADI-R SC","ADI-R RRB","ADI-R Social","ADI-R Communication","ADI-R RRB","ADOS SA CSS","ADOS RRB CSS","SRS","RBS","SSP","Vineland Socialization","Vineland Daily Living Skills","Vineland Communication","Vineland ABC")
cnames = c("All_Disc.fstat","All_Disc.pval","All_Rep.fstat","All_Rep.pval",
"SCequalRRB_vs_SCoverRRB_Disc.fstat","SCequalRRB_vs_SCoverRRB_Disc.tstat",
"SCequalRRB_vs_SCoverRRB_Disc.pval","SCequalRRB_vs_SCoverRRB_Disc.es",
"SCequalRRB_vs_SCoverRRB_Rep.fstat","SCequalRRB_vs_SCoverRRB_Rep.tstat",
"SCequalRRB_vs_SCoverRRB_Rep.pval","SCequalRRB_vs_SCoverRRB_Rep.es",
"SCequalRRB_vs_SCoverRRB.repBF")
output_res = data.frame(matrix(nrow = length(vars2analyze),ncol = length(cnames)))
colnames(output_res) = cnames
rownames(output_res) = vars2analyze
output_res$varNames = vars2analyze
for (ivar in 1:length(vars2analyze)){
y_var = vars2analyze[ivar]
# print(y_var)
#----------------------------------------------------------------------------
# Discovery
df4mod = subset(df2use, df2use$dataset=="Discovery")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"All_Disc.fstat"] = res["subgrp","F-value"]
output_res[y_var,"All_Disc.pval"] = res["subgrp","p-value"]
#----------------------------------------------------------------------------
# Replication
df4mod = subset(df2use, df2use$dataset=="Replication")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"All_Rep.fstat"] = res["subgrp","F-value"]
output_res[y_var,"All_Rep.pval"] = res["subgrp","p-value"]
#----------------------------------------------------------------------------
# Discovery
df4mod = subset(df2use, df2use$dataset=="Discovery" & !is.element(df2use$subgrp,c("TD","RRB_over_SC")))
n1 = sum(df4mod$subgrp=="SC_equal_RRB")
m1 = sum(df4mod$subgrp=="SC_over_RRB")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.fstat"] = res["subgrp","F-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.pval"] = res["subgrp","p-value"]
res = summary(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"] = res$tTable[2,"t-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.es"] = cohens_d(df4mod[df4mod$subgrp=="SC_equal_RRB",y_var],
df4mod[df4mod$subgrp=="SC_over_RRB",y_var])
#----------------------------------------------------------------------------
# Replication
df4mod = subset(df2use, df2use$dataset=="Replication" & !is.element(df2use$subgrp,c("TD","RRB_over_SC")))
n2 = sum(df4mod$subgrp=="SC_equal_RRB")
m2 = sum(df4mod$subgrp=="SC_over_RRB")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.fstat"] = res["subgrp","F-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.pval"] = res["subgrp","p-value"]
res = summary(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.tstat"] = res$tTable[2,"t-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.es"] = cohens_d(df4mod[df4mod$subgrp=="SC_equal_RRB",y_var],
df4mod[df4mod$subgrp=="SC_over_RRB",y_var])
#----------------------------------------------------------------------------
# Replication Bayes Factor
res_bf = BFSALL(tobs = output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"],
trep = output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"],
n1 = n1,
n2 = n2,
m1 = m1,
m2 = m2,
sample = 2,
Type = 'ALL')
output_res[y_var,"SCequalRRB_vs_SCoverRRB.repBF"] = res_bf["Replication BF","Replication 1"]
# make a plot
colors2use = get_ggColorHue(3)
df4plot = subset(df2use, df2use$subgrp!="RRB_over_SC")
p = ggplot(data = df4plot, aes_string(x = "subgrp", y = y_var, colour = "subgrp")) + facet_grid(. ~ dataset)
p = p + geom_jitter() + geom_boxplot(fill = NA, colour = "#000000", outlier.shape = NA)
p = p + ylab(vnames[ivar]) + xlab("Group") +
scale_x_discrete(labels=c("SC_equal_RRB"="SC=RRB","SC_over_RRB"="SC>RRB","TD"="TD")) +
scale_colour_manual(values = c(colors2use[2:3],"grey60")) + guides(colour=FALSE) +
theme(text = element_text(size=fontSize-5),
axis.text.x = element_text(size=fontSize-5),
axis.text.y = element_text(size=fontSize-5))
print(p)
}
vabc = data.frame(matrix(nrow=6,ncol=2))
colnames(vabc) = c("Discovery","Replication")
rownames(vabc) = c("0.5","0.6","0.7","0.8","0.9","1")
vabc["0.5","Discovery"] = output_res["vabsabcabc_standard","SCequalRRB_vs_SCoverRRB_Disc.es"]
vabc["0.5","Replication"] = output_res["vabsabcabc_standard","SCequalRRB_vs_SCoverRRB_Rep.es"]
vabc_dls = data.frame(matrix(nrow=6,ncol=2))
colnames(vabc_dls) = c("Discovery","Replication")
rownames(vabc_dls) = c("0.5","0.6","0.7","0.8","0.9","1")
vabc_dls["0.5","Discovery"] = output_res["vabsdscoresd_dss","SCequalRRB_vs_SCoverRRB_Disc.es"]
vabc_dls["0.5","Replication"] = output_res["vabsdscoresd_dss","SCequalRRB_vs_SCoverRRB_Rep.es"]
output_res
## All_Disc.fstat All_Disc.pval All_Rep.fstat All_Rep.pval
## age 0.2573919 8.560255e-01 0.99073365 3.977672e-01
## meanFD 3.7568185 1.149997e-02 0.24909122 8.619423e-01
## viq_all 2.0602422 1.061159e-01 1.91036720 1.284730e-01
## piq_all 1.6442358 1.797895e-01 1.59078093 1.921753e-01
## fsiq4_all 2.2680322 8.121156e-02 1.65268759 1.778719e-01
## A_pct_severity 27.8532657 9.335821e-11 36.83073911 2.426948e-13
## B_pct_severity 31.1111186 1.008793e-11 4.96019221 8.429684e-03
## ADI_social_total 1.9132206 1.518344e-01 5.35188550 5.867807e-03
## ADI_communication_total 0.7525201 4.732668e-01 11.76233584 2.058013e-05
## ADI_RRB_total 26.0459642 3.320546e-10 0.54838564 5.792441e-01
## ados_2_SA_CSS 2.6948807 7.150723e-02 1.55852306 2.146072e-01
## ados_2_RRB_CSS 0.7451863 4.767593e-01 0.09256188 9.116568e-01
## SRS_tscore_self 36.6468450 0.000000e+00 32.85687549 1.887379e-15
## RBS_total 18.4726404 2.023459e-10 12.69506727 1.580707e-07
## SSP_total 30.3736907 5.995204e-15 22.94434995 5.732526e-12
## vabsdscoress_dss 22.8844511 2.963851e-12 24.37737030 5.986323e-13
## vabsdscoresd_dss 11.9970970 4.428882e-07 9.41186882 9.619700e-06
## vabsdscoresc_dss 8.9064742 1.810010e-05 7.87815546 6.373651e-05
## vabsabcabc_standard 17.4757273 8.952938e-10 15.73645152 5.898297e-09
## SCequalRRB_vs_SCoverRRB_Disc.fstat
## age 0.023737296
## meanFD 3.030459423
## viq_all 0.029671461
## piq_all 0.063182446
## fsiq4_all 0.009116035
## A_pct_severity 21.687217221
## B_pct_severity 39.468478364
## ADI_social_total 0.149703080
## ADI_communication_total 0.344130518
## ADI_RRB_total 25.836285643
## ados_2_SA_CSS 0.003129000
## ados_2_RRB_CSS 1.700635530
## SRS_tscore_self 1.128509387
## RBS_total 0.071971426
## SSP_total 0.274616524
## vabsdscoress_dss 2.202237362
## vabsdscoresd_dss 0.143787967
## vabsdscoresc_dss 0.003264895
## vabsabcabc_standard 0.200518258
## SCequalRRB_vs_SCoverRRB_Disc.tstat
## age -0.15406913
## meanFD -1.74082148
## viq_all -0.17225406
## piq_all -0.25136119
## fsiq4_all -0.09547793
## A_pct_severity 4.65695364
## B_pct_severity -6.28239432
## ADI_social_total 0.38691482
## ADI_communication_total 0.58662639
## ADI_RRB_total -5.08294065
## ados_2_SA_CSS 0.05593746
## ados_2_RRB_CSS -1.30408417
## SRS_tscore_self 1.06231322
## RBS_total 0.26827491
## SSP_total 0.52403867
## vabsdscoress_dss -1.48399372
## vabsdscoresd_dss -0.37919384
## vabsdscoresc_dss 0.05713926
## vabsabcabc_standard -0.44779265
## SCequalRRB_vs_SCoverRRB_Disc.pval
## age 8.778350e-01
## meanFD 8.448583e-02
## viq_all 8.635572e-01
## piq_all 8.020081e-01
## fsiq4_all 9.241074e-01
## A_pct_severity 8.957158e-06
## B_pct_severity 6.696983e-09
## ADI_social_total 6.995601e-01
## ADI_communication_total 5.586458e-01
## ADI_RRB_total 1.518247e-06
## ados_2_SA_CSS 9.554950e-01
## ados_2_RRB_CSS 1.949775e-01
## SRS_tscore_self 2.934092e-01
## RBS_total 7.890756e-01
## SSP_total 6.019810e-01
## vabsdscoress_dss 1.408063e-01
## vabsdscoresd_dss 7.053170e-01
## vabsdscoresc_dss 9.545429e-01
## vabsabcabc_standard 6.552342e-01
## SCequalRRB_vs_SCoverRRB_Disc.es
## age 0.028844334
## meanFD 0.344384924
## viq_all 0.008856869
## piq_all 0.069781157
## fsiq4_all 0.020006875
## A_pct_severity -0.715775439
## B_pct_severity 1.216289079
## ADI_social_total -0.007511102
## ADI_communication_total 0.054103075
## ADI_RRB_total 1.041784220
## ados_2_SA_CSS 0.014204764
## ados_2_RRB_CSS 0.241170234
## SRS_tscore_self -0.088107447
## RBS_total -0.008937980
## SSP_total -0.127843143
## vabsdscoress_dss 0.214368240
## vabsdscoresd_dss 0.041627724
## vabsdscoresc_dss -0.017163951
## vabsabcabc_standard 0.082515990
## SCequalRRB_vs_SCoverRRB_Rep.fstat
## age 0.24570362
## meanFD 0.99663587
## viq_all 1.72401230
## piq_all 3.58574024
## fsiq4_all 2.47507676
## A_pct_severity 55.22952665
## B_pct_severity 5.50174307
## ADI_social_total 9.47374889
## ADI_communication_total 15.98406931
## ADI_RRB_total 0.55594738
## ados_2_SA_CSS 1.88367323
## ados_2_RRB_CSS 0.24422396
## SRS_tscore_self 0.82958946
## RBS_total 1.15143630
## SSP_total 0.05281376
## vabsdscoress_dss 14.90205172
## vabsdscoresd_dss 7.04076224
## vabsdscoresc_dss 7.64829090
## vabsabcabc_standard 17.25615787
## SCequalRRB_vs_SCoverRRB_Rep.tstat
## age -0.4956850
## meanFD 0.9983165
## viq_all -1.3130165
## piq_all -1.8936051
## fsiq4_all -1.5732377
## A_pct_severity 7.4316571
## B_pct_severity -2.3455795
## ADI_social_total 3.0779456
## ADI_communication_total 3.9980082
## ADI_RRB_total -0.7456188
## ados_2_SA_CSS 1.3724698
## ados_2_RRB_CSS -0.4941902
## SRS_tscore_self 0.9108180
## RBS_total 1.0730500
## SSP_total -0.2298125
## vabsdscoress_dss -3.8603176
## vabsdscoresd_dss -2.6534435
## vabsdscoresc_dss -2.7655544
## vabsabcabc_standard -4.1540532
## SCequalRRB_vs_SCoverRRB_Rep.pval
## age 6.210158e-01
## meanFD 3.201182e-01
## viq_all 1.917468e-01
## piq_all 6.070556e-02
## fsiq4_all 1.183413e-01
## A_pct_severity 1.678557e-11
## B_pct_severity 2.062474e-02
## ADI_social_total 2.578856e-03
## ADI_communication_total 1.102988e-04
## ADI_RRB_total 4.573437e-01
## ados_2_SA_CSS 1.725652e-01
## ados_2_RRB_CSS 6.221064e-01
## SRS_tscore_self 3.662953e-01
## RBS_total 2.857086e-01
## SSP_total 8.188716e-01
## vabsdscoress_dss 1.923479e-04
## vabsdscoresd_dss 9.169900e-03
## vabsdscoresc_dss 6.675582e-03
## vabsabcabc_standard 6.552828e-05
## SCequalRRB_vs_SCoverRRB_Rep.es
## age 0.092631311
## meanFD -0.186560744
## viq_all 0.295813936
## piq_all 0.439664548
## fsiq4_all 0.367904628
## A_pct_severity -1.350293936
## B_pct_severity 0.431758294
## ADI_social_total -0.549950345
## ADI_communication_total -0.711319192
## ADI_RRB_total 0.138401647
## ados_2_SA_CSS -0.253070996
## ados_2_RRB_CSS 0.000223351
## SRS_tscore_self -0.239627574
## RBS_total -0.244260593
## SSP_total 0.172842856
## vabsdscoress_dss 0.724827207
## vabsdscoresd_dss 0.513171882
## vabsdscoresc_dss 0.539616008
## vabsabcabc_standard 0.765845685
## SCequalRRB_vs_SCoverRRB.repBF varNames
## age 6.952307e-01 age
## meanFD 3.075536e+00 meanFD
## viq_all 6.969642e-01 viq_all
## piq_all 7.088276e-01 piq_all
## fsiq4_all 6.897484e-01 fsiq4_all
## A_pct_severity 1.538184e+04 A_pct_severity
## B_pct_severity 1.957605e+07 B_pct_severity
## ADI_social_total 7.401322e-01 ADI_social_total
## ADI_communication_total 8.157931e-01 ADI_communication_total
## ADI_RRB_total 8.768287e+04 ADI_RRB_total
## ados_2_SA_CSS 6.886466e-01 ados_2_SA_CSS
## ados_2_RRB_CSS 1.599530e+00 ados_2_RRB_CSS
## SRS_tscore_self 1.207569e+00 SRS_tscore_self
## RBS_total 7.121920e-01 RBS_total
## SSP_total 7.875919e-01 SSP_total
## vabsdscoress_dss 2.050609e+00 vabsdscoress_dss
## vabsdscoresd_dss 7.381464e-01 vabsdscoresd_dss
## vabsdscoresc_dss 6.876964e-01 vabsdscoresc_dss
## vabsabcabc_standard 7.592313e-01 vabsabcabc_standard
#------------------------------------------------------------------------------
# Z-score threshold to use for subtyping
z_thresh = 0.6
# vars2use = c("dbaes_atotal","dbaes_btotal")
# compute Discovery mean and sd
ds_disc = Dverbal_Discovery[,vars2use[1]] - Dverbal_Discovery[,vars2use[2]]
mean2use = mean(ds_disc)
sd2use = sd(ds_disc)
Dverbal_Discovery = make_subtype(data2use = Dverbal_Discovery,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
# compute Replication mean and sd
ds_rep = Dverbal_Replication[,vars2use[1]] - Dverbal_Replication[,vars2use[2]]
mean2use = mean(ds_rep)
sd2use = sd(ds_rep)
Dverbal_Replication = make_subtype(data2use = Dverbal_Replication,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#------------------------------------------------------------------------------
# Table of subtypes X sex
# Discovery
table(Dverbal_Discovery$z_ds_group,Dverbal_Discovery$sex)
##
## F M
## RRB_over_SC 51 197
## SC_equal_RRB 95 310
## SC_over_RRB 51 185
cs_res = chisq.test(Dverbal_Discovery$z_ds_group,Dverbal_Discovery$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: Dverbal_Discovery$z_ds_group and Dverbal_Discovery$sex
## X-squared = 0.80219, df = 2, p-value = 0.6696
# Replication
table(Dverbal_Replication$z_ds_group,Dverbal_Replication$sex)
##
## F M
## RRB_over_SC 48 188
## SC_equal_RRB 102 330
## SC_over_RRB 46 176
cs_res = chisq.test(Dverbal_Replication$z_ds_group,Dverbal_Replication$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: Dverbal_Replication$z_ds_group and Dverbal_Replication$sex
## X-squared = 1.2434, df = 2, p-value = 0.537
#------------------------------------------------------------------------------
# Descriptive stats
# Discovery
df2use = Dverbal_Discovery[,c("subjectkey","dataset_id","collection_id","interview_age","sex","z_ds_group","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")]
df2use$age = df2use$interview_age/12
cols2use = c("z_ds_group","sex","age","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")
res=describeBy(x = df2use[,cols2use], group = "z_ds_group")
res
##
## Descriptive statistics by group
## group: RRB_over_SC
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 248 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00
## sex* 2 248 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 248 10.02 4.19 9.50 9.67 3.46 2.00 27.17 25.17
## ados_age 4 34 98.76 43.79 89.00 97.07 50.41 27.00 202.00 175.00
## ados_sa_css 5 34 6.53 2.38 7.00 6.61 2.97 2.00 10.00 8.00
## ados_rrb_css 6 34 7.65 1.87 8.00 7.86 1.48 1.00 10.00 9.00
## iq 7 66 102.45 17.49 105.00 103.67 15.57 53.00 139.00 86.00
## dbaes_atotal 8 248 0.22 0.11 0.22 0.22 0.12 0.00 0.51 0.51
## dbaes_btotal 9 248 0.46 0.13 0.45 0.46 0.13 0.14 0.79 0.65
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 0.93 1.26 0.27
## ados_age 0.40 -0.79 7.51
## ados_sa_css -0.29 -1.12 0.41
## ados_rrb_css -1.41 2.70 0.32
## iq -0.68 0.52 2.15
## dbaes_atotal 0.11 -0.62 0.01
## dbaes_btotal 0.08 -0.36 0.01
## ------------------------------------------------------------
## group: SC_equal_RRB
## vars n mean sd median trimmed mad min max range skew
## z_ds_group* 1 405 2.00 0.00 2.00 2.00 0.00 2 2.00 0.00 NaN
## sex* 2 405 NaN NA NA NaN NA Inf -Inf -Inf NA
## age 3 405 9.17 5.79 8.08 8.34 4.69 0 45.75 45.75 2.05
## ados_age 4 69 77.71 42.10 61.00 72.93 34.10 33 182.00 149.00 0.83
## ados_sa_css 5 69 6.84 2.04 7.00 6.95 1.48 1 10.00 9.00 -0.41
## ados_rrb_css 6 69 7.83 2.23 8.00 8.18 1.48 1 10.00 9.00 -1.59
## iq 7 101 104.53 18.27 106.00 105.68 19.27 42 138.00 96.00 -0.75
## dbaes_atotal 8 405 0.30 0.14 0.30 0.30 0.14 0 0.67 0.67 -0.02
## dbaes_btotal 9 405 0.31 0.13 0.31 0.32 0.14 0 0.68 0.68 -0.14
## kurtosis se
## z_ds_group* NaN 0.00
## sex* NA NA
## age 6.77 0.29
## ados_age -0.48 5.07
## ados_sa_css -0.01 0.25
## ados_rrb_css 2.50 0.27
## iq 1.03 1.82
## dbaes_atotal -0.37 0.01
## dbaes_btotal -0.21 0.01
## ------------------------------------------------------------
## group: SC_over_RRB
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 236 3.00 0.00 3.00 3.00 0.00 3.00 3.00 0.00
## sex* 2 236 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 236 7.31 4.96 5.83 6.46 2.97 1.67 37.33 35.67
## ados_age 4 50 69.88 36.57 64.50 63.25 27.43 30.00 172.00 142.00
## ados_sa_css 5 50 7.36 1.65 7.00 7.40 1.48 4.00 10.00 6.00
## ados_rrb_css 6 50 7.86 2.19 8.00 8.22 1.48 1.00 10.00 9.00
## iq 7 32 105.00 18.45 111.00 106.15 13.34 40.00 140.00 100.00
## dbaes_atotal 8 236 0.46 0.14 0.46 0.46 0.14 0.11 0.87 0.76
## dbaes_btotal 9 236 0.23 0.11 0.22 0.22 0.10 0.00 0.57 0.57
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 2.36 7.77 0.32
## ados_age 1.42 1.38 5.17
## ados_sa_css -0.14 -0.71 0.23
## ados_rrb_css -1.69 3.15 0.31
## iq -1.17 2.72 3.26
## dbaes_atotal 0.17 -0.19 0.01
## dbaes_btotal 0.28 -0.07 0.01
# Replication
df2use = Dverbal_Replication[,c("subjectkey","dataset_id","collection_id","interview_age","sex","z_ds_group","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")]
df2use$age = df2use$interview_age/12
cols2use = c("z_ds_group","sex","age","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")
res=describeBy(x = df2use[,cols2use], group = "z_ds_group")
res
##
## Descriptive statistics by group
## group: RRB_over_SC
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 236 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00
## sex* 2 236 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 236 9.83 4.75 9.29 9.34 4.26 3.00 28.58 25.58
## ados_age 4 24 88.00 50.54 76.50 82.75 56.34 36.00 196.00 160.00
## ados_sa_css 5 24 6.50 1.91 7.00 6.55 1.48 3.00 10.00 7.00
## ados_rrb_css 6 24 7.67 1.52 7.50 7.70 1.48 5.00 10.00 5.00
## iq 7 71 102.72 18.36 104.00 103.26 17.79 57.00 152.00 95.00
## dbaes_atotal 8 236 0.23 0.11 0.23 0.22 0.10 0.01 0.61 0.60
## dbaes_btotal 9 236 0.48 0.13 0.47 0.47 0.11 0.21 0.93 0.72
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 1.21 2.10 0.31
## ados_age 0.69 -0.76 10.32
## ados_sa_css -0.16 -0.89 0.39
## ados_rrb_css -0.16 -0.94 0.31
## iq -0.21 0.40 2.18
## dbaes_atotal 0.62 0.73 0.01
## dbaes_btotal 0.53 0.74 0.01
## ------------------------------------------------------------
## group: SC_equal_RRB
## vars n mean sd median trimmed mad min max range skew
## z_ds_group* 1 432 2.00 0.00 2.00 2.00 0.00 2 2.00 0.00 NaN
## sex* 2 432 NaN NA NA NaN NA Inf -Inf -Inf NA
## age 3 432 8.79 5.20 7.83 8.05 4.45 0 33.83 33.83 1.71
## ados_age 4 82 80.21 38.11 71.50 76.18 41.51 35 188.00 153.00 0.74
## ados_sa_css 5 82 6.90 2.15 7.00 6.97 2.97 2 10.00 8.00 -0.12
## ados_rrb_css 6 82 7.21 2.56 8.00 7.62 1.48 1 10.00 9.00 -1.22
## iq 7 80 108.08 15.91 108.00 107.80 13.34 64 146.00 82.00 0.06
## dbaes_atotal 8 432 0.31 0.14 0.30 0.31 0.14 0 0.74 0.74 -0.02
## dbaes_btotal 9 432 0.32 0.14 0.32 0.32 0.14 0 0.81 0.81 0.07
## kurtosis se
## z_ds_group* NaN 0.00
## sex* NA NA
## age 4.13 0.25
## ados_age -0.31 4.21
## ados_sa_css -0.93 0.24
## ados_rrb_css 0.72 0.28
## iq 0.19 1.78
## dbaes_atotal -0.27 0.01
## dbaes_btotal 0.05 0.01
## ------------------------------------------------------------
## group: SC_over_RRB
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 222 3.00 0.00 3.00 3.00 0.00 3.00 3.00 0.00
## sex* 2 222 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 222 8.10 6.17 6.38 6.88 3.77 0.00 40.92 40.92
## ados_age 4 47 71.87 28.18 69.00 68.85 25.20 30.00 141.00 111.00
## ados_sa_css 5 47 7.17 1.75 7.00 7.18 1.48 3.00 10.00 7.00
## ados_rrb_css 6 47 7.49 2.14 8.00 7.69 1.48 1.00 10.00 9.00
## iq 7 35 110.37 18.04 111.00 111.07 17.79 62.00 146.00 84.00
## dbaes_atotal 8 222 0.48 0.13 0.48 0.47 0.13 0.14 0.96 0.82
## dbaes_btotal 9 222 0.24 0.12 0.24 0.24 0.12 0.00 0.66 0.66
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 2.16 5.47 0.41
## ados_age 0.94 0.23 4.11
## ados_sa_css 0.06 -0.76 0.26
## ados_rrb_css -1.07 1.30 0.31
## iq -0.47 -0.20 3.05
## dbaes_atotal 0.39 0.20 0.01
## dbaes_btotal 0.23 0.03 0.01
#------------------------------------------------------------------------------
# Tests of differences in interview age across the subtypes
# Discovery
mod2use = lm(formula = interview_age ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: interview_age
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 134446 67223 17.478 3.591e-08 ***
## Residuals 886 3407620 3846
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = interview_age ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: interview_age
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 50929 25464.7 6.1841 0.002152 **
## Residuals 887 3652435 4117.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in SC across the subtypes
# Discovery
mod2use = lm(formula = dbaes_atotal ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_atotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 7.017 3.5085 206.5 < 2.2e-16 ***
## Residuals 886 15.054 0.0170
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = dbaes_atotal ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_atotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 7.426 3.713 218.26 < 2.2e-16 ***
## Residuals 887 15.089 0.017
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in RRB across the subtypes
# Discovery
mod2use = lm(formula = dbaes_btotal ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_btotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 6.598 3.2990 212.94 < 2.2e-16 ***
## Residuals 886 13.727 0.0155
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = dbaes_btotal ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_btotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 6.8743 3.4371 197.27 < 2.2e-16 ***
## Residuals 887 15.4543 0.0174
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in ADOS SA CSS across the subtypes
# Discovery
mod2use = lm(formula = ados_sa_css ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_sa_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 15.23 7.6168 1.894 0.1541
## Residuals 150 603.24 4.0216
# Replication
mod2use = lm(formula = ados_sa_css ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_sa_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 7.20 3.6005 0.9003 0.4086
## Residuals 150 599.86 3.9991
#------------------------------------------------------------------------------
# Tests of differences in ADOS RRB CSS across the subtypes
# Discovery
mod2use = lm(formula = ados_rrb_css ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_rrb_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 1.02 0.5106 0.1114 0.8947
## Residuals 150 687.70 4.5847
# Replication
mod2use = lm(formula = ados_rrb_css ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_rrb_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 4.95 2.4748 0.4684 0.6269
## Residuals 150 792.55 5.2837
#------------------------------------------------------------------------------
# Tests of differences in IQ across the subtypes
# Discovery
mod2use = lm(formula = iq ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: iq
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 217 108.61 0.3336 0.7167
## Residuals 196 63811 325.57
# Replication
mod2use = lm(formula = iq ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: iq
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 1738 869.07 2.9092 0.05705 .
## Residuals 183 54668 298.73
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Make scatterplots with difference score Z subtypes in different colors
maxScores = c(3,4)
p_disc = ggplot(data = Dverbal_Discovery, aes(x = dbaes_atotal, y = dbaes_btotal, colour = z_ds_group)) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,1) + xlim(0,1) + ggtitle("NDAR Discovery")
p1_top_left = p_disc + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Disc_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p1_top_left)
p_disc
table(Dverbal_Discovery$z_ds_group)
##
## RRB_over_SC SC_equal_RRB SC_over_RRB
## 248 405 236
cor_res = cor.test(Dverbal_Discovery$dbaes_atotal, Dverbal_Discovery$dbaes_btotal)
cor_res
##
## Pearson's product-moment correlation
##
## data: Dverbal_Discovery$dbaes_atotal and Dverbal_Discovery$dbaes_btotal
## t = 6.6829, df = 887, p-value = 4.134e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1554332 0.2806578
## sample estimates:
## cor
## 0.2189469
p_rep = ggplot(data = Dverbal_Replication, aes(x = dbaes_atotal, y = dbaes_btotal, colour = z_ds_group)) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,1) + xlim(0,1) + ggtitle("NDAR Replication")
p2_bottom_left = p_rep + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Rep_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p2_bottom_left)
p_rep
table(Dverbal_Replication$z_ds_group)
##
## RRB_over_SC SC_equal_RRB SC_over_RRB
## 236 432 222
cor_res = cor.test(Dverbal_Replication$dbaes_atotal, Dverbal_Replication$dbaes_btotal)
cor_res
##
## Pearson's product-moment correlation
##
## data: Dverbal_Replication$dbaes_atotal and Dverbal_Replication$dbaes_btotal
## t = 7.1459, df = 888, p-value = 1.864e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1700824 0.2943934
## sample estimates:
## cor
## 0.2331903
#------------------------------------------------------------------------------
# Run supervised model with Discovery as training and Replication as Test
# run validation
# make subtypes using z-scores computed from the mean and sd of the training set
train_data = Dverbal_Discovery
test_data = Dverbal_Replication
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
tmp_train = make_subtype(data2use = train_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
mean2use = mean(test_data[,vars2use[1]] - test_data[,vars2use[2]])
sd2use = sd(test_data[,vars2use[1]] - test_data[,vars2use[2]])
tmp_test = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#==============================================================================
#==============================================================================
# make labels based on train mean and sd
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
train_mean = mean2use
train_sd = sd2use
pred_labels = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = train_mean,
sd2use = train_sd)
confmat = table(tmp_test$z_ds_group,pred_labels$z_ds_group)
acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/dim(pred_labels)[1]
# # compute model
# mod2use = svm(x = tmp_train[,vars2use], y = tmp_train$z_ds_group)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
#==============================================================================
#==============================================================================
# plot confusion matrix
setHook("grid.newpage", function() pushViewport(viewport(x=1,y=1,width=0.9, height=0.9, name="vp", just=c("right","top"))), action="prepend")
pheatmap(confmat/rowSums(confmat)*100, display_numbers = confmat, color = colorRampPalette(c('white','red'))(100), cluster_rows = FALSE, cluster_cols = FALSE, fontsize_number = fontSize, fontsize_row = fontSize, fontsize_col = fontSize,labels_row = c("RRB>SC","SC=RRB","SC>RRB"),labels_col = c("RRB>SC","SC=RRB","SC>RRB"),angle_col=90,
breaks= seq(0,100, length=100))
setHook("grid.newpage", NULL, "replace")
grid::grid.text("Actual Labels", y=-0.07, gp=gpar(fontsize=fontSize))
grid::grid.text("Predicted Labels", x=-0.07, rot=90, gp=gpar(fontsize=fontSize))
# show accuracy
true_accuracy = acc
true_accuracy
## [1] 0.9820225
#================================================================================
#================================================================================
# #------------------------------------------------------------------------------
# # Permute subtype labels to examine how well supervised model performs
#
# # nperm = 10000
#
# # make subtypes using z-scores computed from the mean and sd of the training set
# train_data = Dverbal_Discovery
# test_data = Dverbal_Replication
# mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
# sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
# tmp_train = make_subtype(data2use = train_data,
# z_thresh = z_thresh,
# mean2use = mean2use,
# sd2use = sd2use)
#
# mean2use = mean(test_data[,vars2use[1]] - test_data[,vars2use[2]])
# sd2use = sd(test_data[,vars2use[1]] - test_data[,vars2use[2]])
# tmp_test = make_subtype(data2use = test_data,
# z_thresh = z_thresh,
# mean2use = mean2use,
# sd2use = sd2use)
#
# acc = vector(length = nperm)
# for (iperm in 1:nperm){
# # set seed for reproducibility
# set.seed(iperm)
#
# sc_perm = sample(train_data[,vars2use[1]])
# rrb_perm = sample(train_data[,vars2use[2]])
# perm_mean2use = mean(sc_perm - rrb_perm)
# perm_sd2use = sd(sc_perm - rrb_perm)
# # perm_mean2use = mean(train_data[,vars2use[1]] - rrb_perm)
# # perm_sd2use = sd(train_data[,vars2use[1]] - rrb_perm)
# pred_labels = make_subtype(data2use = tmp_test,
# z_thresh = z_thresh,
# mean2use = perm_mean2use,
# sd2use = perm_sd2use)
# confmat = table(tmp_test$z_ds_group,pred_labels$z_ds_group)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/dim(pred_labels)[1]
#
# # compute model
# permuted_labels = sample(tmp_train$z_ds_group)
# mod2use = svm(x = tmp_train[,vars2use], y = permuted_labels)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc[iperm] = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
# #============================================================================
# } # for (iperm in 1:nperm)
#
# df2plot = data.frame(Accuracy = acc)
# p = ggplot(data = df2plot, aes(x = Accuracy)) + geom_histogram() + geom_vline(xintercept=true_accuracy)
# p
#
# # compute p-value
# pval = sum(c(true_accuracy,acc)>=true_accuracy)/(nperm+1)
# pval
#================================================================================
#================================================================================
#------------------------------------------------------------------------------
# Plot difference score Z subtypes in Discovery set
maxScores = c(3,4)
# Discovery - make plot with all individuals shown as lines
df2use = Dverbal_Discovery[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(tmp_train$z_ds_group)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + ylim(0,1) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p3_middle_top = p + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Disc_jitterplot_z%s.pdf",as.character(z_thresh))), plot = p3_middle_top)
p
# Plot difference score Z subtypes in Replication set
maxScores = c(3,4)
# Replication - make plot with all individuals shown as lines
df2use = Dverbal_Replication[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(tmp_test$z_ds_group)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + ylim(0,1) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p4_middle_bottom = p + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Rep_jitterplot_z%s.pdf",as.character(z_thresh))), plot = p4_middle_bottom)
p
#------------------------------------------------------------------------------
# Apply NDAR subtypes to EU-AIMS LEAP data
# make SC and RRB percentages since EU-AIMS data is specified as percentages
Dverbal = read.csv(file.path(datapath,"tidy_verbal.csv"))
euaims_data = read.csv(file.path(datapath,"tidy_euaims.csv"))
mask1 = euaims_data$Diagnosis=="ASD"
mask2 = (is.na(euaims_data$A1_pct_severity) | is.na(euaims_data$A2_pct_severity) | is.na(euaims_data$A3_pct_severity) | is.na(euaims_data$B1_pct_severity) | is.na(euaims_data$B2_pct_severity) | is.na(euaims_data$B3_pct_severity) | is.na(euaims_data$B4_pct_severity))
euaims_data = subset(euaims_data, (mask1 & !mask2))
Dverbal[,vars2use[1]] = Dverbal[,vars2use[1]]
Dverbal[,vars2use[2]] = Dverbal[,vars2use[2]]
euaims_data[,vars2use[1]] = (euaims_data$A1_pct_severity +
euaims_data$A2_pct_severity +
euaims_data$A3_pct_severity)/3
euaims_data[,vars2use[2]] = (euaims_data$B1_pct_severity +
euaims_data$B2_pct_severity +
euaims_data$B3_pct_severity +
euaims_data$B4_pct_severity)/4
train_data = Dverbal
test_data = euaims_data
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]], na.rm=TRUE)
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]], na.rm=TRUE)
c(mean2use, sd2use)
## [1] -0.01045243 0.19482749
tmp_train = make_subtype(data2use = train_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
tmp_test = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#===========================================================================
#===========================================================================
# # compute model
# mod2use = svm(x = tmp_train[,vars2use], y = tmp_train$z_ds_group)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
#
# tmp_test$svm_pred_labels = pred_labels
#
# # plot confusion matrix
# setHook("grid.newpage", function() pushViewport(viewport(x=1,y=1,width=0.9, height=0.9, name="vp", just=c("right","top"))), action="prepend")
# pheatmap(confmat/rowSums(confmat)*100, display_numbers = confmat, color = colorRampPalette(c('white','red'))(100), cluster_rows = FALSE, cluster_cols = FALSE, fontsize_number = fontSize, fontsize_row = fontSize, fontsize_col = fontSize,labels_row = c("RRB>SC","SC=RRB","SC>RRB"),labels_col = c("RRB>SC","SC=RRB","SC>RRB"),angle_col=90,
# breaks= seq(0,100, length=100))
# setHook("grid.newpage", NULL, "replace")
# grid::grid.text("Actual Labels", y=-0.07, gp=gpar(fontsize=fontSize))
# grid::grid.text("Predicted Labels", x=-0.07, rot=90, gp=gpar(fontsize=fontSize))
#===========================================================================
#===========================================================================
# scatterplot
p1 = ggplot(data = tmp_train, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(z_ds_group))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("NDAR ALL")
p1
p2 = ggplot(data = tmp_test, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(z_ds_group))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("EU-AIMS with Groups from NDAR ALL")
p2
#===========================================================================
#===========================================================================
# p3 = ggplot(data = tmp_test, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(pred_labels))) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("EU-AIMS")
# p5_bottom_right = p3 + guides(colour=FALSE)
# ggsave(filename = file.path(plotpath, sprintf("final_EUAIMS_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p5_bottom_right)
# p3
#===========================================================================
#===========================================================================
# # write out EU-AIMS LEAP data with subgroups defined by NDAR All
write.csv(tmp_test, file.path(datapath, sprintf("tidy_euaims_NDAR_subtypes_diffscore_z%s.csv",as.character(z_thresh))))
#===========================================================================
#===========================================================================
# # Make final plot
# p_final = p1_top_left + p3_middle_top + plot_spacer() + p2_bottom_left + p4_middle_bottom + p5_bottom_right + plot_layout(nrow=3, ncol=3, widths = c(4,4,4), heights = c(8,8,8))
# ggsave(filename = file.path(plotpath, sprintf("final_NDAR_EUAIMS_subtypes_plot_z%s.pdf",as.character(z_thresh))), plot = p_final)
# p_final
#===========================================================================
#===========================================================================
#------------------------------------------------------------------------------
# Integrate subtypes with rest of EU-AIMS LEAP data
euaims_data = read.csv(file.path(datapath,"tidy_euaims.csv"))
td_df = subset(euaims_data, euaims_data$Diagnosis=="TD",select=2:6)
td_df$A_pct_severity = as.numeric(NA)
td_df$B_pct_severity = as.numeric(NA)
td_df$subgrp = "TD"
#===========================================================================
#===========================================================================
# cols2use = c("subid","age","Centre","Schedule","Diagnosis","dbaes_atotal","dbaes_btotal","svm_pred_labels")
cols2use = c("subid","age","Centre","Schedule","Diagnosis","dbaes_atotal","dbaes_btotal","z_ds_group")
#===========================================================================
#===========================================================================
tmp_asd = tmp_test[,cols2use]
colnames(tmp_asd)[6] = "A_pct_severity"
colnames(tmp_asd)[7] = "B_pct_severity"
colnames(tmp_asd)[8] = "subgrp"
asd_df = tmp_asd
all_data = rbind(td_df,asd_df)
fname = "/Users/mlombardo/Dropbox/euaims/data/rsfmri_preproc/euaims_preproc.xlsx"
pp_data = read_excel(fname)
mask = pp_data$notes=="ok"
pp_data = subset(pp_data, mask)
asd_df = merge(pp_data[,c("subid","sex")],asd_df, by = "subid")
td_df = merge(pp_data[,c("subid","sex")],td_df, by = "subid")
all_data = rbind(td_df,asd_df)
data2write = merge(pp_data, all_data, by = "subid")
data2write$age = data2write$age.x
data2write$sex = data2write$sex.y
print(table(data2write$Schedule, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## A 8 32 40 78
## B 6 35 44 83
## C 4 26 33 59
## D 1 7 30 23
print(table(data2write$Centre, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## CAMBRIDGE 4 24 14 29
## KINGS_COLLEGE 10 39 57 78
## MANNHEIM 0 0 0 34
## NIJMEGEN 5 31 57 64
## UTRECHT 0 6 19 38
print(table(data2write$sex, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## Female 6 25 40 88
## Male 13 75 107 155
#DSM-5 - find best split that balances participants across sites
a = findBestSplit(asd_df, seed_range = c(172342)) #,300001:500000))
## [1] 172342
best_seeds = a$seed[!is.na(a$discrepancy) & a$discrepancy==min(a$discrepancy, na.rm = TRUE)]
print(best_seeds)
## [1] 172342
# Split datasets -------------------------------------------------------------
rngSeed = best_seeds[1]
# split Schedule A dataset
dset2use = subset(asd_df, asd_df$Schedule=="A")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
A_Discovery = tmp_d[[2]]
A_Replication = tmp_d[[1]]
# split Schedule B dataset
dset2use = subset(asd_df, asd_df$Schedule=="B")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
B_Discovery = tmp_d[[2]]
B_Replication = tmp_d[[1]]
# split Schedule C dataset
dset2use = subset(asd_df, asd_df$Schedule=="C")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
C_Discovery = tmp_d[[2]]
C_Replication = tmp_d[[1]]
# split Schedule D dataset
dset2use = subset(asd_df, asd_df$Schedule=="D")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
D_Discovery = tmp_d[[2]]
D_Replication = tmp_d[[1]]
df_Disc = rbind(A_Discovery, B_Discovery, C_Discovery, D_Discovery)
df_Rep = rbind(A_Replication, B_Replication, C_Replication, D_Replication)
a = table(df_Disc$Schedule, df_Disc$Centre); print(a)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A 6 17 10 7
## B 9 16 14 3
## C 6 9 14 2
## D 0 10 10 0
b = table(df_Rep$Schedule, df_Rep$Centre); print(b)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A 7 18 10 5
## B 8 17 13 5
## C 6 10 13 3
## D 0 9 9 0
print(a-b)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A -1 -1 0 2
## B 1 -1 1 -2
## C 0 -1 1 -1
## D 0 1 1 0
print(sum(rowSums(abs(a-b))))
## [1] 14
data2write$dataset = NA
mask = is.element(data2write$subid,df_Disc$subid)
data2write[mask,"dataset"] = "Discovery"
mask = is.element(data2write$subid,df_Rep$subid)
data2write[mask,"dataset"] = "Replication"
asd_Disc = subset(data2write, data2write$dataset=="Discovery" & data2write$Diagnosis=="ASD")
asd_Rep = subset(data2write, data2write$dataset=="Replication" & data2write$Diagnosis=="ASD")
# # find which seed gives best TD age-match -------------------------------------
# seeds = 1:1000
# pvals = data.frame(matrix(nrow = length(seeds), ncol = 2))
# for (i in 1:length(seeds)) {
# res = findTDAgeMatch(data2write, seed_range = c(seeds[i],seeds[i]))
# td_Disc_matched = res[[2]]
# td_Rep_matched = res[[1]]
# tres = t.test(td_Disc_matched$age, asd_Disc$age)
# pvals[i,1] = tres$p.value
# tres = t.test(td_Rep_matched$age, asd_Rep$age)
# pvals[i,2] = tres$p.value
# #print(i)
# }
# a = sort.int(pvals[,1], decreasing = TRUE, index.return = TRUE)
# b=pvals[a$ix,]
seed2use = 929
res = findTDAgeMatch(data2write, seed_range = c(seed2use,seed2use))
td_Disc_matched = res[[2]]
td_Rep_matched = res[[1]]
mask = is.element(data2write$subid, td_Disc_matched$subid)
data2write$dataset[mask] = "Discovery"
mask = is.element(data2write$subid, td_Rep_matched$subid)
data2write$dataset[mask] = "Replication"
print(table(data2write$dataset, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## Discovery 13 48 72 121
## Replication 6 52 75 122
fname2save = here(sprintf("asd_subgrp_data_rsfmri_ALL_DSM5_diffzscoreGrps_z%s.csv",as.character(z_thresh)))
write.csv(data2write,file = fname2save)
#------------------------------------------------------------------------------
# Descriptive stats
df2use = data2write[,c("subid","Diagnosis","dataset","Centre","meanFD","age","sex","subgrp","viq_all","piq_all","fsiq4_all","A_pct_severity","B_pct_severity","ADI_social_total","ADI_communication_total","ADI_RRB_total","ados_2_SA_CSS","ados_2_RRB_CSS","SRS_tscore","SRS_tscore_self","RBS_total","SSP_total","vabsdscoresc_dss","vabsdscoresd_dss","vabsdscoress_dss","vabsabcabc_standard")]
mask = df2use==999 | df2use==777
df2use[mask] = NA
df2use$age = df2use$age/365
cols2use = c("dataset","subgrp","age","meanFD","viq_all","piq_all","fsiq4_all","A_pct_severity","B_pct_severity","ADI_social_total","ADI_communication_total","ADI_RRB_total","ados_2_SA_CSS","ados_2_RRB_CSS","SRS_tscore","SRS_tscore_self","RBS_total","SSP_total","vabsdscoresc_dss","vabsdscoresd_dss","vabsdscoress_dss","vabsabcabc_standard")
res=describeBy(x = df2use[,cols2use], group = c("subgrp","dataset"))
res
##
## Descriptive statistics by group
## subgrp: RRB_over_SC
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 13 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 13 NaN NA NA NaN NA Inf -Inf
## age 3 13 17.31 5.96 18.04 17.56 7.26 7.89 24.03
## meanFD 4 13 0.24 0.28 0.18 0.18 0.04 0.06 1.14
## viq_all 5 13 97.69 14.94 96.00 97.55 19.27 73.00 123.85
## piq_all 6 13 99.46 15.95 99.00 100.00 13.34 64.00 129.00
## fsiq4_all 7 13 98.70 14.08 96.00 99.19 11.86 74.00 118.01
## A_pct_severity 8 13 0.19 0.13 0.14 0.18 0.14 0.00 0.41
## B_pct_severity 9 13 0.41 0.14 0.42 0.41 0.15 0.13 0.59
## ADI_social_total 10 13 16.69 8.10 20.00 17.18 7.41 2.00 26.00
## ADI_communication_total 11 13 14.77 6.52 16.00 15.09 5.93 2.00 24.00
## ADI_RRB_total 12 13 7.69 2.25 8.00 7.91 1.48 3.00 10.00
## ados_2_SA_CSS 13 13 4.62 2.96 3.00 4.55 2.97 1.00 9.00
## ados_2_RRB_CSS 14 13 4.38 3.88 1.00 4.18 0.00 1.00 10.00
## SRS_tscore 15 9 74.33 10.52 75.00 74.33 13.34 58.00 90.00
## SRS_tscore_self 16 6 60.17 5.88 62.00 60.17 3.71 49.00 65.00
## RBS_total 17 8 18.75 9.08 19.00 18.75 10.38 7.00 33.00
## SSP_total 18 6 137.50 21.57 132.00 137.50 21.50 116.00 167.00
## vabsdscoresc_dss 19 9 75.22 22.37 77.00 75.22 11.86 29.00 115.00
## vabsdscoresd_dss 20 9 63.78 14.25 65.00 63.78 11.86 31.00 79.00
## vabsdscoress_dss 21 9 64.89 14.06 69.00 64.89 11.86 35.00 82.00
## vabsabcabc_standard 22 9 70.89 8.25 70.00 70.89 5.93 59.00 88.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 16.14 -0.27 -1.67 1.65
## meanFD 1.09 2.59 5.61 0.08
## viq_all 50.85 -0.06 -1.19 4.14
## piq_all 65.00 -0.29 -0.05 4.42
## fsiq4_all 44.01 -0.10 -1.29 3.91
## A_pct_severity 0.41 0.25 -1.40 0.04
## B_pct_severity 0.46 -0.52 -0.97 0.04
## ADI_social_total 24.00 -0.45 -1.40 2.25
## ADI_communication_total 22.00 -0.36 -1.00 1.81
## ADI_RRB_total 7.00 -0.70 -0.64 0.62
## ados_2_SA_CSS 8.00 0.31 -1.64 0.82
## ados_2_RRB_CSS 9.00 0.24 -1.96 1.08
## SRS_tscore 32.00 -0.01 -1.46 3.51
## SRS_tscore_self 16.00 -0.99 -0.72 2.40
## RBS_total 26.00 0.18 -1.44 3.21
## SSP_total 51.00 0.28 -1.97 8.80
## vabsdscoresc_dss 86.00 -0.35 0.15 7.46
## vabsdscoresd_dss 48.00 -1.13 0.32 4.75
## vabsdscoress_dss 47.00 -0.80 -0.38 4.69
## vabsabcabc_standard 29.00 0.64 -0.39 2.75
## ------------------------------------------------------------
## subgrp: SC_equal_RRB
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 48 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 48 NaN NA NA NaN NA Inf -Inf
## age 3 48 16.25 5.89 14.92 15.91 4.95 7.08 30.28
## meanFD 4 48 0.32 0.56 0.23 0.23 0.15 0.04 3.95
## viq_all 5 47 97.22 18.05 97.35 97.31 19.65 64.00 136.00
## piq_all 6 47 100.08 19.23 102.80 100.65 16.60 61.00 142.00
## fsiq4_all 7 48 98.91 17.44 103.86 99.47 19.27 60.00 131.00
## A_pct_severity 8 48 0.32 0.14 0.32 0.31 0.14 0.03 0.63
## B_pct_severity 9 48 0.32 0.16 0.29 0.32 0.15 0.01 0.69
## ADI_social_total 10 48 17.40 6.89 18.50 17.73 6.67 3.00 27.00
## ADI_communication_total 11 48 14.29 6.17 14.00 14.38 7.41 0.00 26.00
## ADI_RRB_total 12 48 5.54 2.39 5.50 5.60 2.22 0.00 12.00
## ados_2_SA_CSS 13 47 6.17 2.65 6.00 6.23 2.97 1.00 10.00
## ados_2_RRB_CSS 14 47 5.11 2.74 5.00 5.13 2.97 1.00 10.00
## SRS_tscore 15 44 71.30 11.81 72.50 71.47 13.34 47.00 90.00
## SRS_tscore_self 16 26 62.19 11.97 65.00 61.95 12.60 43.00 89.00
## RBS_total 17 43 17.05 13.47 15.00 15.20 11.86 0.00 53.00
## SSP_total 18 27 134.19 29.86 137.00 134.35 25.20 81.00 187.00
## vabsdscoresc_dss 19 47 72.26 18.04 75.00 73.21 13.34 21.00 122.00
## vabsdscoresd_dss 20 46 72.76 14.52 72.00 73.16 10.38 25.00 105.00
## vabsdscoress_dss 21 47 71.96 14.97 74.00 73.49 11.86 20.00 95.00
## vabsabcabc_standard 22 46 70.17 13.26 72.00 71.11 9.64 20.00 100.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 23.20 0.55 -0.59 0.85
## meanFD 3.91 5.62 33.37 0.08
## viq_all 72.00 -0.11 -0.77 2.63
## piq_all 81.00 -0.31 -0.59 2.81
## fsiq4_all 71.00 -0.33 -0.79 2.52
## A_pct_severity 0.60 0.12 -0.66 0.02
## B_pct_severity 0.68 0.33 -0.35 0.02
## ADI_social_total 24.00 -0.44 -0.92 0.99
## ADI_communication_total 26.00 -0.15 -0.83 0.89
## ADI_RRB_total 12.00 -0.08 -0.03 0.34
## ados_2_SA_CSS 9.00 -0.21 -1.17 0.39
## ados_2_RRB_CSS 9.00 -0.37 -1.03 0.40
## SRS_tscore 43.00 -0.19 -1.10 1.78
## SRS_tscore_self 46.00 0.07 -0.99 2.35
## RBS_total 53.00 1.15 0.74 2.05
## SSP_total 106.00 -0.10 -1.05 5.75
## vabsdscoresc_dss 101.00 -0.40 1.21 2.63
## vabsdscoresd_dss 80.00 -0.53 1.44 2.14
## vabsdscoress_dss 75.00 -1.18 1.97 2.18
## vabsabcabc_standard 80.00 -1.15 3.22 1.96
## ------------------------------------------------------------
## subgrp: SC_over_RRB
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 72 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 72 NaN NA NA NaN NA Inf -Inf
## age 3 72 16.26 5.21 15.88 16.08 5.11 7.56 29.40
## meanFD 4 72 0.23 0.22 0.15 0.18 0.10 0.03 1.08
## viq_all 5 71 98.23 19.62 100.00 98.10 20.22 61.00 142.00
## piq_all 6 71 99.28 22.87 102.00 100.05 21.93 52.43 150.00
## fsiq4_all 7 72 98.84 19.86 102.25 99.36 19.60 59.00 143.00
## A_pct_severity 8 72 0.42 0.14 0.44 0.42 0.13 0.16 0.82
## B_pct_severity 9 72 0.17 0.10 0.16 0.17 0.12 0.00 0.46
## ADI_social_total 10 72 17.57 6.48 18.00 17.90 8.90 3.00 28.00
## ADI_communication_total 11 72 14.10 4.98 14.50 14.24 5.19 2.00 24.00
## ADI_RRB_total 12 72 3.15 2.13 3.00 3.02 1.48 0.00 10.00
## ados_2_SA_CSS 13 70 6.41 2.51 7.00 6.61 2.97 1.00 10.00
## ados_2_RRB_CSS 14 70 4.67 2.75 5.00 4.59 2.97 1.00 10.00
## SRS_tscore 15 61 73.07 12.14 74.00 73.71 13.34 44.00 95.00
## SRS_tscore_self 16 28 62.96 12.16 61.00 62.04 9.64 42.00 94.00
## RBS_total 17 59 17.07 17.01 14.00 14.55 16.31 0.00 90.00
## SSP_total 18 46 138.93 30.98 140.00 140.08 37.06 53.00 189.00
## vabsdscoresc_dss 19 67 72.25 16.33 72.00 73.00 10.38 21.00 107.00
## vabsdscoresd_dss 20 67 71.90 17.73 72.00 71.45 13.34 17.00 131.00
## vabsdscoress_dss 21 67 67.70 16.30 69.00 68.80 13.34 20.00 104.00
## vabsabcabc_standard 22 67 68.51 15.50 70.00 69.27 11.86 6.00 103.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 21.85 0.33 -0.50 0.61
## meanFD 1.05 2.20 4.74 0.03
## viq_all 81.00 0.06 -0.64 2.33
## piq_all 97.57 -0.26 -0.58 2.71
## fsiq4_all 84.00 -0.24 -0.88 2.34
## A_pct_severity 0.66 0.27 -0.27 0.02
## B_pct_severity 0.46 0.47 -0.44 0.01
## ADI_social_total 25.00 -0.34 -0.91 0.76
## ADI_communication_total 22.00 -0.23 -0.60 0.59
## ADI_RRB_total 10.00 0.74 0.31 0.25
## ados_2_SA_CSS 9.00 -0.57 -0.72 0.30
## ados_2_RRB_CSS 9.00 -0.19 -1.15 0.33
## SRS_tscore 51.00 -0.35 -0.40 1.55
## SRS_tscore_self 52.00 1.00 0.54 2.30
## RBS_total 90.00 1.80 4.43 2.21
## SSP_total 136.00 -0.40 -0.39 4.57
## vabsdscoresc_dss 86.00 -0.77 2.04 2.00
## vabsdscoresd_dss 114.00 0.25 1.86 2.17
## vabsdscoress_dss 84.00 -0.72 0.83 1.99
## vabsabcabc_standard 97.00 -1.17 3.91 1.89
## ------------------------------------------------------------
## subgrp: TD
## dataset: Discovery
## vars n mean sd median trimmed mad min
## dataset* 1 121 NaN NA NA NaN NA Inf
## subgrp* 2 121 NaN NA NA NaN NA Inf
## age 3 121 16.83 5.23 16.65 16.73 5.69 7.22
## meanFD 4 121 0.18 0.15 0.13 0.15 0.07 0.03
## viq_all 5 119 104.52 19.70 105.00 105.03 19.27 46.00
## piq_all 6 119 106.10 19.47 107.00 107.53 17.79 49.00
## fsiq4_all 7 119 105.72 18.33 108.18 106.99 16.58 53.00
## A_pct_severity 8 0 NaN NA NA NaN NA Inf
## B_pct_severity 9 0 NaN NA NA NaN NA Inf
## ADI_social_total 10 0 NaN NA NA NaN NA Inf
## ADI_communication_total 11 0 NaN NA NA NaN NA Inf
## ADI_RRB_total 12 0 NaN NA NA NaN NA Inf
## ados_2_SA_CSS 13 0 NaN NA NA NaN NA Inf
## ados_2_RRB_CSS 14 0 NaN NA NA NaN NA Inf
## SRS_tscore 15 68 47.84 9.40 45.00 46.32 5.19 37.00
## SRS_tscore_self 16 71 46.69 4.85 46.00 46.26 4.45 39.00
## RBS_total 17 68 2.15 4.74 0.00 0.95 0.00 0.00
## SSP_total 18 59 177.86 12.71 182.00 179.78 8.90 122.00
## vabsdscoresc_dss 19 34 91.97 25.44 99.50 93.50 21.50 21.00
## vabsdscoresd_dss 20 34 90.74 20.28 98.50 92.25 17.05 33.00
## vabsdscoress_dss 21 34 96.21 23.67 102.50 98.57 21.50 33.00
## vabsabcabc_standard 22 34 92.06 23.04 99.50 93.89 15.57 25.00
## max range skew kurtosis se
## dataset* -Inf -Inf NA NA NA
## subgrp* -Inf -Inf NA NA NA
## age 29.84 22.62 0.17 -0.51 0.48
## meanFD 0.85 0.82 2.28 5.65 0.01
## viq_all 160.00 114.00 -0.24 0.38 1.81
## piq_all 147.00 98.00 -0.69 0.32 1.79
## fsiq4_all 142.00 89.00 -0.69 0.58 1.68
## A_pct_severity -Inf -Inf NA NA NA
## B_pct_severity -Inf -Inf NA NA NA
## ADI_social_total -Inf -Inf NA NA NA
## ADI_communication_total -Inf -Inf NA NA NA
## ADI_RRB_total -Inf -Inf NA NA NA
## ados_2_SA_CSS -Inf -Inf NA NA NA
## ados_2_RRB_CSS -Inf -Inf NA NA NA
## SRS_tscore 76.00 39.00 1.54 1.44 1.14
## SRS_tscore_self 63.00 24.00 0.93 1.10 0.58
## RBS_total 27.00 27.00 3.13 10.87 0.57
## SSP_total 190.00 68.00 -1.90 4.85 1.66
## vabsdscoresc_dss 138.00 117.00 -0.71 0.36 4.36
## vabsdscoresd_dss 121.00 88.00 -0.80 0.07 3.48
## vabsdscoress_dss 129.00 96.00 -0.83 -0.31 4.06
## vabsabcabc_standard 127.00 102.00 -0.90 0.30 3.95
## ------------------------------------------------------------
## subgrp: RRB_over_SC
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 6 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 6 NaN NA NA NaN NA Inf -Inf
## age 3 6 14.70 3.15 13.73 14.70 2.60 11.45 19.56
## meanFD 4 6 0.20 0.10 0.20 0.20 0.08 0.10 0.38
## viq_all 5 6 106.24 18.88 100.86 106.24 9.12 91.00 143.00
## piq_all 6 6 108.81 22.32 101.90 108.81 17.62 89.00 148.00
## fsiq4_all 7 6 108.00 20.20 101.86 108.00 6.47 93.00 148.00
## A_pct_severity 8 6 0.15 0.08 0.16 0.15 0.05 0.04 0.27
## B_pct_severity 9 6 0.32 0.10 0.32 0.32 0.12 0.18 0.45
## ADI_social_total 10 6 13.83 5.12 14.50 13.83 4.45 5.00 19.00
## ADI_communication_total 11 6 7.83 3.13 8.00 7.83 3.71 3.00 11.00
## ADI_RRB_total 12 6 4.50 2.07 4.50 4.50 1.48 1.00 7.00
## ados_2_SA_CSS 13 6 4.67 2.73 5.00 4.67 3.71 1.00 8.00
## ados_2_RRB_CSS 14 6 4.33 2.73 5.00 4.33 2.97 1.00 7.00
## SRS_tscore 15 5 66.20 12.26 72.00 66.20 10.38 48.00 79.00
## SRS_tscore_self 16 2 64.50 3.54 64.50 64.50 3.71 62.00 67.00
## RBS_total 17 5 14.60 10.01 13.00 14.60 8.90 5.00 30.00
## SSP_total 18 3 142.67 9.07 139.00 142.67 4.45 136.00 153.00
## vabsdscoresc_dss 19 6 76.50 11.40 74.00 76.50 4.45 68.00 99.00
## vabsdscoresd_dss 20 6 72.50 6.72 71.00 72.50 4.45 66.00 85.00
## vabsdscoress_dss 21 6 81.50 9.52 82.00 81.50 8.15 67.00 95.00
## vabsabcabc_standard 22 6 69.17 15.55 75.00 69.17 6.67 39.00 81.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 8.11 0.41 -1.73 1.29
## meanFD 0.28 0.66 -1.05 0.04
## viq_all 52.00 1.09 -0.51 7.71
## piq_all 59.00 0.72 -1.23 9.11
## fsiq4_all 55.00 1.19 -0.36 8.24
## A_pct_severity 0.23 0.16 -1.25 0.03
## B_pct_severity 0.27 -0.03 -1.75 0.04
## ADI_social_total 14.00 -0.59 -1.26 2.09
## ADI_communication_total 8.00 -0.30 -1.67 1.28
## ADI_RRB_total 6.00 -0.45 -1.25 0.85
## ados_2_SA_CSS 7.00 -0.15 -1.85 1.12
## ados_2_RRB_CSS 6.00 -0.29 -1.96 1.12
## SRS_tscore 31.00 -0.41 -1.76 5.48
## SRS_tscore_self 5.00 0.00 -2.75 2.50
## RBS_total 25.00 0.47 -1.64 4.48
## SSP_total 17.00 0.34 -2.33 5.24
## vabsdscoresc_dss 31.00 1.16 -0.38 4.65
## vabsdscoresd_dss 19.00 0.87 -0.82 2.74
## vabsdscoress_dss 28.00 -0.11 -1.39 3.89
## vabsabcabc_standard 42.00 -1.09 -0.55 6.35
## ------------------------------------------------------------
## subgrp: SC_equal_RRB
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 52 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 52 NaN NA NA NaN NA Inf -Inf
## age 3 52 16.67 6.17 16.20 16.35 6.78 7.12 30.15
## meanFD 4 52 0.22 0.15 0.18 0.19 0.11 0.06 0.76
## viq_all 5 51 101.96 15.54 104.00 102.52 19.27 70.00 133.00
## piq_all 6 51 104.75 17.53 106.00 105.33 20.76 66.42 134.00
## fsiq4_all 7 51 103.61 16.05 106.00 104.04 17.79 69.00 130.00
## A_pct_severity 8 52 0.26 0.12 0.24 0.25 0.12 0.04 0.65
## B_pct_severity 9 52 0.25 0.13 0.24 0.24 0.12 0.00 0.67
## ADI_social_total 10 52 14.79 6.47 15.50 14.98 6.67 1.00 27.00
## ADI_communication_total 11 52 11.71 5.74 11.00 11.64 5.93 0.00 24.00
## ADI_RRB_total 12 52 4.12 2.41 4.00 4.10 2.97 0.00 9.00
## ados_2_SA_CSS 13 51 5.43 2.52 6.00 5.46 2.97 1.00 10.00
## ados_2_RRB_CSS 14 51 4.92 2.46 5.00 4.93 1.48 1.00 9.00
## SRS_tscore 15 47 66.21 11.50 67.00 66.05 13.34 43.00 90.00
## SRS_tscore_self 16 25 60.76 7.79 61.00 60.76 7.41 46.00 79.00
## RBS_total 17 44 14.09 11.58 11.50 12.83 11.12 0.00 52.00
## SSP_total 18 30 140.70 27.79 143.00 143.42 34.10 69.00 177.00
## vabsdscoresc_dss 19 48 83.06 15.26 81.00 82.60 14.83 50.00 122.00
## vabsdscoresd_dss 20 48 78.96 15.44 77.50 78.22 12.60 38.00 119.00
## vabsdscoress_dss 21 48 77.62 14.63 79.00 78.28 14.83 30.00 101.00
## vabsabcabc_standard 22 48 78.96 12.86 77.50 78.47 9.64 48.00 117.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 23.03 0.35 -0.87 0.86
## meanFD 0.70 1.61 2.77 0.02
## viq_all 63.00 -0.20 -0.87 2.18
## piq_all 67.58 -0.24 -0.83 2.45
## fsiq4_all 61.00 -0.21 -0.95 2.25
## A_pct_severity 0.61 0.54 0.61 0.02
## B_pct_severity 0.67 0.68 0.69 0.02
## ADI_social_total 26.00 -0.23 -0.84 0.90
## ADI_communication_total 24.00 0.10 -0.71 0.80
## ADI_RRB_total 9.00 0.07 -0.71 0.33
## ados_2_SA_CSS 9.00 -0.11 -1.02 0.35
## ados_2_RRB_CSS 8.00 -0.44 -0.74 0.35
## SRS_tscore 47.00 0.10 -0.87 1.68
## SRS_tscore_self 33.00 0.06 -0.31 1.56
## RBS_total 52.00 1.21 1.67 1.75
## SSP_total 108.00 -0.72 -0.41 5.07
## vabsdscoresc_dss 72.00 0.39 -0.16 2.20
## vabsdscoresd_dss 81.00 0.36 0.46 2.23
## vabsdscoress_dss 71.00 -0.54 0.69 2.11
## vabsabcabc_standard 69.00 0.47 0.39 1.86
## ------------------------------------------------------------
## subgrp: SC_over_RRB
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 75 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 75 NaN NA NA NaN NA Inf -Inf
## age 3 75 16.35 5.14 16.07 16.18 5.91 7.48 29.23
## meanFD 4 75 0.27 0.31 0.16 0.20 0.11 0.04 1.60
## viq_all 5 72 96.69 19.94 99.00 97.83 21.68 50.91 130.00
## piq_all 6 74 98.46 21.74 103.50 100.10 21.50 44.03 138.00
## fsiq4_all 7 73 98.22 19.99 103.00 99.19 19.81 59.00 139.00
## A_pct_severity 8 75 0.45 0.15 0.45 0.45 0.18 0.16 0.75
## B_pct_severity 9 75 0.20 0.12 0.19 0.19 0.13 0.00 0.47
## ADI_social_total 10 75 18.08 5.79 19.00 18.28 5.93 4.00 29.00
## ADI_communication_total 11 75 14.64 4.80 15.00 14.82 4.45 3.00 24.00
## ADI_RRB_total 12 75 3.77 2.40 3.00 3.61 1.48 0.00 10.00
## ados_2_SA_CSS 13 71 6.27 2.75 6.00 6.39 2.97 1.00 10.00
## ados_2_RRB_CSS 14 71 4.63 2.78 5.00 4.53 2.97 1.00 10.00
## SRS_tscore 15 66 73.64 11.65 76.00 74.24 12.60 48.00 90.00
## SRS_tscore_self 16 36 63.03 9.84 61.50 62.83 6.67 40.00 84.00
## RBS_total 17 66 17.89 14.88 13.00 16.17 11.12 0.00 73.00
## SSP_total 18 50 138.68 25.20 138.50 139.03 28.17 91.00 184.00
## vabsdscoresc_dss 19 67 73.79 14.70 75.00 74.42 10.38 21.00 110.00
## vabsdscoresd_dss 20 66 71.53 16.54 68.50 71.06 15.57 42.00 118.00
## vabsdscoress_dss 21 67 65.99 16.35 68.00 66.33 14.83 23.00 112.00
## vabsabcabc_standard 22 66 68.62 13.77 69.00 68.96 11.86 28.00 107.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 21.75 0.32 -0.60 0.59
## meanFD 1.55 2.99 9.05 0.04
## viq_all 79.09 -0.46 -0.64 2.35
## piq_all 93.97 -0.61 -0.51 2.53
## fsiq4_all 80.00 -0.42 -0.82 2.34
## A_pct_severity 0.59 0.05 -1.00 0.02
## B_pct_severity 0.47 0.35 -0.77 0.01
## ADI_social_total 25.00 -0.34 -0.62 0.67
## ADI_communication_total 21.00 -0.32 -0.69 0.55
## ADI_RRB_total 10.00 0.58 -0.41 0.28
## ados_2_SA_CSS 9.00 -0.22 -1.06 0.33
## ados_2_RRB_CSS 9.00 -0.15 -1.23 0.33
## SRS_tscore 42.00 -0.34 -0.95 1.43
## SRS_tscore_self 44.00 0.21 -0.04 1.64
## RBS_total 73.00 1.25 1.44 1.83
## SSP_total 93.00 -0.09 -0.98 3.56
## vabsdscoresc_dss 89.00 -0.76 2.30 1.80
## vabsdscoresd_dss 76.00 0.41 -0.15 2.04
## vabsdscoress_dss 89.00 -0.16 0.40 2.00
## vabsabcabc_standard 79.00 -0.29 1.12 1.69
## ------------------------------------------------------------
## subgrp: TD
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 122 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 122 NaN NA NA NaN NA Inf -Inf
## age 3 122 16.86 6.07 16.34 16.58 7.59 6.89 29.72
## meanFD 4 122 0.23 0.46 0.14 0.15 0.07 0.04 4.60
## viq_all 5 122 104.02 17.58 108.18 105.64 12.13 45.00 140.00
## piq_all 6 122 104.64 18.41 108.96 106.56 14.08 49.00 139.00
## fsiq4_all 7 122 104.94 17.14 108.09 107.29 11.86 50.00 134.00
## A_pct_severity 8 0 NaN NA NA NaN NA Inf -Inf
## B_pct_severity 9 0 NaN NA NA NaN NA Inf -Inf
## ADI_social_total 10 0 NaN NA NA NaN NA Inf -Inf
## ADI_communication_total 11 0 NaN NA NA NaN NA Inf -Inf
## ADI_RRB_total 12 0 NaN NA NA NaN NA Inf -Inf
## ados_2_SA_CSS 13 0 NaN NA NA NaN NA Inf -Inf
## ados_2_RRB_CSS 14 0 NaN NA NA NaN NA Inf -Inf
## SRS_tscore 15 65 47.23 9.34 44.00 45.66 4.45 37.00 90.00
## SRS_tscore_self 16 61 48.44 6.84 47.00 47.63 5.93 39.00 69.00
## RBS_total 17 63 3.08 11.54 0.00 0.86 0.00 0.00 89.00
## SSP_total 18 54 174.93 19.38 182.00 178.41 6.67 75.00 190.00
## vabsdscoresc_dss 19 39 92.74 25.45 96.00 95.82 20.76 21.00 125.00
## vabsdscoresd_dss 20 39 91.10 22.65 97.00 93.91 14.83 27.00 122.00
## vabsdscoress_dss 21 39 98.90 27.04 103.00 102.12 17.79 20.00 132.00
## vabsabcabc_standard 22 38 93.00 25.39 100.00 96.44 15.57 20.00 126.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 22.83 0.33 -0.97 0.55
## meanFD 4.56 7.54 64.83 0.04
## viq_all 95.00 -0.99 1.51 1.59
## piq_all 90.00 -0.93 0.67 1.67
## fsiq4_all 84.00 -1.28 1.67 1.55
## A_pct_severity -Inf NA NA NA
## B_pct_severity -Inf NA NA NA
## ADI_social_total -Inf NA NA NA
## ADI_communication_total -Inf NA NA NA
## ADI_RRB_total -Inf NA NA NA
## ados_2_SA_CSS -Inf NA NA NA
## ados_2_RRB_CSS -Inf NA NA NA
## SRS_tscore 53.00 2.14 5.82 1.16
## SRS_tscore_self 30.00 1.05 0.48 0.88
## RBS_total 89.00 6.60 45.89 1.45
## SSP_total 115.00 -2.88 10.92 2.64
## vabsdscoresc_dss 104.00 -1.21 1.00 4.08
## vabsdscoresd_dss 95.00 -1.34 1.26 3.63
## vabsdscoress_dss 112.00 -1.26 1.10 4.33
## vabsabcabc_standard 106.00 -1.42 1.40 4.12
#------------------------------------------------------------------------------
# Table of subtypes X sex
# Discovery
data2use = subset(data2write,data2write$dataset=="Discovery")
table(data2use$subgrp,data2use$sex)
##
## Female Male
## RRB_over_SC 5 8
## SC_equal_RRB 12 36
## SC_over_RRB 18 54
## TD 41 80
cs_res = chisq.test(data2use$subgrp,data2use$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: data2use$subgrp and data2use$sex
## X-squared = 2.7445, df = 3, p-value = 0.4327
# Replication
data2use = subset(data2write,data2write$dataset=="Replication")
table(data2use$subgrp,data2use$sex)
##
## Female Male
## RRB_over_SC 1 5
## SC_equal_RRB 13 39
## SC_over_RRB 22 53
## TD 47 75
cs_res = chisq.test(data2use$subgrp,data2use$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: data2use$subgrp and data2use$sex
## X-squared = 4.3766, df = 3, p-value = 0.2236
#------------------------------------------------------------------------------
vars2analyze = c("age","meanFD","viq_all","piq_all","fsiq4_all",
"A_pct_severity","B_pct_severity",
"ADI_social_total","ADI_communication_total","ADI_RRB_total",
"ados_2_SA_CSS","ados_2_RRB_CSS",
"SRS_tscore_self","RBS_total","SSP_total",
"vabsdscoress_dss","vabsdscoresd_dss","vabsdscoresc_dss","vabsabcabc_standard")
vnames = c("Age","Mean FD","VIQ","PIQ","FIQ","ADI-R SC","ADI-R RRB","ADI-R Social","ADI-R Communication","ADI-R RRB","ADOS SA CSS","ADOS RRB CSS","SRS","RBS","SSP","Vineland Socialization","Vineland Daily Living Skills","Vineland Communication","Vineland ABC")
cnames = c("All_Disc.fstat","All_Disc.pval","All_Rep.fstat","All_Rep.pval",
"SCequalRRB_vs_SCoverRRB_Disc.fstat","SCequalRRB_vs_SCoverRRB_Disc.tstat",
"SCequalRRB_vs_SCoverRRB_Disc.pval","SCequalRRB_vs_SCoverRRB_Disc.es",
"SCequalRRB_vs_SCoverRRB_Rep.fstat","SCequalRRB_vs_SCoverRRB_Rep.tstat",
"SCequalRRB_vs_SCoverRRB_Rep.pval","SCequalRRB_vs_SCoverRRB_Rep.es",
"SCequalRRB_vs_SCoverRRB.repBF")
output_res = data.frame(matrix(nrow = length(vars2analyze),ncol = length(cnames)))
colnames(output_res) = cnames
rownames(output_res) = vars2analyze
output_res$varNames = vars2analyze
for (ivar in 1:length(vars2analyze)){
y_var = vars2analyze[ivar]
# print(y_var)
#----------------------------------------------------------------------------
# Discovery
df4mod = subset(df2use, df2use$dataset=="Discovery")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"All_Disc.fstat"] = res["subgrp","F-value"]
output_res[y_var,"All_Disc.pval"] = res["subgrp","p-value"]
#----------------------------------------------------------------------------
# Replication
df4mod = subset(df2use, df2use$dataset=="Replication")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"All_Rep.fstat"] = res["subgrp","F-value"]
output_res[y_var,"All_Rep.pval"] = res["subgrp","p-value"]
#----------------------------------------------------------------------------
# Discovery
df4mod = subset(df2use, df2use$dataset=="Discovery" & !is.element(df2use$subgrp,c("TD","RRB_over_SC")))
n1 = sum(df4mod$subgrp=="SC_equal_RRB")
m1 = sum(df4mod$subgrp=="SC_over_RRB")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.fstat"] = res["subgrp","F-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.pval"] = res["subgrp","p-value"]
res = summary(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"] = res$tTable[2,"t-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.es"] = cohens_d(df4mod[df4mod$subgrp=="SC_equal_RRB",y_var],
df4mod[df4mod$subgrp=="SC_over_RRB",y_var])
#----------------------------------------------------------------------------
# Replication
df4mod = subset(df2use, df2use$dataset=="Replication" & !is.element(df2use$subgrp,c("TD","RRB_over_SC")))
n2 = sum(df4mod$subgrp=="SC_equal_RRB")
m2 = sum(df4mod$subgrp=="SC_over_RRB")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.fstat"] = res["subgrp","F-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.pval"] = res["subgrp","p-value"]
res = summary(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.tstat"] = res$tTable[2,"t-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.es"] = cohens_d(df4mod[df4mod$subgrp=="SC_equal_RRB",y_var],
df4mod[df4mod$subgrp=="SC_over_RRB",y_var])
#----------------------------------------------------------------------------
# Replication Bayes Factor
res_bf = BFSALL(tobs = output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"],
trep = output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"],
n1 = n1,
n2 = n2,
m1 = m1,
m2 = m2,
sample = 2,
Type = 'ALL')
output_res[y_var,"SCequalRRB_vs_SCoverRRB.repBF"] = res_bf["Replication BF","Replication 1"]
# make a plot
colors2use = get_ggColorHue(3)
df4plot = subset(df2use, df2use$subgrp!="RRB_over_SC")
p = ggplot(data = df4plot, aes_string(x = "subgrp", y = y_var, colour = "subgrp")) + facet_grid(. ~ dataset)
p = p + geom_jitter() + geom_boxplot(fill = NA, colour = "#000000", outlier.shape = NA)
p = p + ylab(vnames[ivar]) + xlab("Group") +
scale_x_discrete(labels=c("SC_equal_RRB"="SC=RRB","SC_over_RRB"="SC>RRB","TD"="TD")) +
scale_colour_manual(values = c(colors2use[2:3],"grey60")) + guides(colour=FALSE) +
theme(text = element_text(size=fontSize-5),
axis.text.x = element_text(size=fontSize-5),
axis.text.y = element_text(size=fontSize-5))
print(p)
}
vabc["0.6","Discovery"] = output_res["vabsabcabc_standard","SCequalRRB_vs_SCoverRRB_Disc.es"]
vabc["0.6","Replication"] = output_res["vabsabcabc_standard","SCequalRRB_vs_SCoverRRB_Rep.es"]
vabc_dls["0.6","Discovery"] = output_res["vabsdscoresd_dss","SCequalRRB_vs_SCoverRRB_Disc.es"]
vabc_dls["0.6","Replication"] = output_res["vabsdscoresd_dss","SCequalRRB_vs_SCoverRRB_Rep.es"]
output_res
## All_Disc.fstat All_Disc.pval All_Rep.fstat All_Rep.pval
## age 0.2764449 8.423702e-01 0.5333990 6.597850e-01
## meanFD 2.8411213 3.849901e-02 0.2732541 8.446636e-01
## viq_all 2.0606131 1.060655e-01 1.9418668 1.234238e-01
## piq_all 1.5443272 2.036586e-01 1.0637207 3.651305e-01
## fsiq4_all 2.2139914 8.707432e-02 1.4197600 2.375609e-01
## A_pct_severity 31.1339556 9.934498e-12 44.9753913 1.776357e-15
## B_pct_severity 29.4477380 3.111467e-11 4.2452557 1.641808e-02
## ADI_social_total 1.2910958 2.785534e-01 6.5424344 1.975574e-03
## ADI_communication_total 0.4871784 6.155008e-01 10.4729563 6.165916e-05
## ADI_RRB_total 28.6351580 5.433998e-11 0.2482669 7.805294e-01
## ados_2_SA_CSS 2.5786048 7.994439e-02 2.3961729 9.533984e-02
## ados_2_RRB_CSS 0.7229015 4.873759e-01 0.2605255 7.710741e-01
## SRS_tscore_self 36.7695315 0.000000e+00 32.9427493 1.776357e-15
## RBS_total 18.4871548 1.991074e-10 13.3371250 7.360727e-08
## SSP_total 30.4728775 5.551115e-15 22.8487987 6.286305e-12
## vabsdscoress_dss 23.7174717 1.278755e-12 26.2420515 9.459100e-14
## vabsdscoresd_dss 12.2686164 3.219601e-07 10.2000437 3.685760e-06
## vabsdscoresc_dss 8.9789554 1.655782e-05 8.8683692 1.864441e-05
## vabsabcabc_standard 17.6024053 7.799775e-10 17.7427118 6.433748e-10
## SCequalRRB_vs_SCoverRRB_Disc.fstat
## age 1.208723e-04
## meanFD 1.613557e+00
## viq_all 1.573745e-02
## piq_all 3.467867e-02
## fsiq4_all 8.884585e-04
## A_pct_severity 2.623303e+01
## B_pct_severity 4.267055e+01
## ADI_social_total 4.106023e-01
## ADI_communication_total 4.859287e-01
## ADI_RRB_total 3.085261e+01
## ados_2_SA_CSS 4.391497e-01
## ados_2_RRB_CSS 6.978527e-01
## SRS_tscore_self 1.357964e+00
## RBS_total 1.031475e-01
## SSP_total 3.557144e-01
## vabsdscoress_dss 4.187447e+00
## vabsdscoresd_dss 3.343510e-01
## vabsdscoresc_dss 3.553608e-03
## vabsabcabc_standard 5.307453e-01
## SCequalRRB_vs_SCoverRRB_Disc.tstat
## age 0.01099419
## meanFD -1.27025873
## viq_all 0.12544898
## piq_all -0.18622209
## fsiq4_all -0.02980702
## A_pct_severity 5.12181909
## B_pct_severity -6.53227014
## ADI_social_total 0.64078259
## ADI_communication_total 0.69708587
## ADI_RRB_total -5.55451285
## ados_2_SA_CSS 0.66268368
## ados_2_RRB_CSS -0.83537579
## SRS_tscore_self 1.16531709
## RBS_total 0.32116585
## SSP_total 0.59641799
## vabsdscoress_dss -2.04632533
## vabsdscoresd_dss -0.57823091
## vabsdscoresc_dss -0.05961215
## vabsabcabc_standard -0.72852266
## SCequalRRB_vs_SCoverRRB_Disc.pval
## age 9.912471e-01
## meanFD 2.065565e-01
## viq_all 9.003910e-01
## piq_all 8.526045e-01
## fsiq4_all 9.762726e-01
## A_pct_severity 1.230234e-06
## B_pct_severity 1.838366e-09
## ADI_social_total 5.229388e-01
## ADI_communication_total 4.871559e-01
## ADI_RRB_total 1.820225e-07
## ados_2_SA_CSS 5.088946e-01
## ados_2_RRB_CSS 4.052847e-01
## SRS_tscore_self 2.495322e-01
## RBS_total 7.487757e-01
## SSP_total 5.528764e-01
## vabsdscoress_dss 4.313077e-02
## vabsdscoresd_dss 5.643120e-01
## vabsdscoresc_dss 9.525737e-01
## vabsabcabc_standard 4.678705e-01
## SCequalRRB_vs_SCoverRRB_Disc.es
## age -0.0020486507
## meanFD 0.2366987382
## viq_all -0.0531943418
## piq_all 0.0370911813
## fsiq4_all 0.0034257024
## A_pct_severity -0.7647298789
## B_pct_severity 1.1922722865
## ADI_social_total -0.0261197588
## ADI_communication_total 0.0354493213
## ADI_RRB_total 1.0690358682
## ados_2_SA_CSS -0.0951001837
## ados_2_RRB_CSS 0.1585085466
## SRS_tscore_self -0.0638805495
## RBS_total -0.0013559914
## SSP_total -0.1555410869
## vabsdscoress_dss 0.2696617895
## vabsdscoresd_dss 0.0523634891
## vabsdscoresc_dss 0.0000932107
## vabsabcabc_standard 0.1137339593
## SCequalRRB_vs_SCoverRRB_Rep.fstat
## age 0.1025825
## meanFD 1.2269701
## viq_all 1.7164429
## piq_all 1.8155303
## fsiq4_all 1.5264451
## A_pct_severity 71.9443659
## B_pct_severity 4.1152326
## ADI_social_total 11.2111628
## ADI_communication_total 12.7860799
## ADI_RRB_total 0.1987732
## ados_2_SA_CSS 3.0812381
## ados_2_RRB_CSS 0.4605097
## SRS_tscore_self 0.9242800
## RBS_total 3.1219435
## SSP_total 0.1438166
## vabsdscoress_dss 20.5316013
## vabsdscoresd_dss 8.7578258
## vabsdscoresc_dss 10.7802375
## vabsabcabc_standard 22.1882695
## SCequalRRB_vs_SCoverRRB_Rep.tstat
## age -0.3202850
## meanFD 1.1076868
## viq_all -1.3101309
## piq_all -1.3474162
## fsiq4_all -1.2354939
## A_pct_severity 8.4820025
## B_pct_severity -2.0286036
## ADI_social_total 3.3483075
## ADI_communication_total 3.5757628
## ADI_RRB_total -0.4458398
## ados_2_SA_CSS 1.7553456
## ados_2_RRB_CSS -0.6786086
## SRS_tscore_self 0.9613948
## RBS_total 1.7669022
## SSP_total -0.3792315
## vabsdscoress_dss -4.5311810
## vabsdscoresd_dss -2.9593624
## vabsdscoresc_dss -3.2833272
## vabsabcabc_standard -4.7104426
## SCequalRRB_vs_SCoverRRB_Rep.pval
## age 7.493002e-01
## meanFD 2.701762e-01
## viq_all 1.926962e-01
## piq_all 1.803849e-01
## fsiq4_all 2.190803e-01
## A_pct_severity 6.128431e-14
## B_pct_severity 4.467566e-02
## ADI_social_total 1.081815e-03
## ADI_communication_total 5.014772e-04
## ADI_RRB_total 6.565032e-01
## ados_2_SA_CSS 8.181752e-02
## ados_2_RRB_CSS 4.987259e-01
## SRS_tscore_self 3.404866e-01
## RBS_total 8.015024e-02
## SSP_total 7.055875e-01
## vabsdscoress_dss 1.497522e-05
## vabsdscoresd_dss 3.782327e-03
## vabsdscoresc_dss 1.375857e-03
## vabsabcabc_standard 7.329985e-06
## SCequalRRB_vs_SCoverRRB_Rep.es
## age 0.05779712
## meanFD -0.19988792
## viq_all 0.28830307
## piq_all 0.31291930
## fsiq4_all 0.29154308
## A_pct_severity -1.39474985
## B_pct_severity 0.42698121
## ADI_social_total -0.54164153
## ADI_communication_total -0.56267039
## ADI_RRB_total 0.14227907
## ados_2_SA_CSS -0.31500221
## ados_2_RRB_CSS 0.10825686
## SRS_tscore_self -0.25035851
## RBS_total -0.27897124
## SSP_total 0.07684736
## vabsdscoress_dss 0.74280170
## vabsdscoresd_dss 0.46132860
## vabsdscoresc_dss 0.62106305
## vabsabcabc_standard 0.77122945
## SCequalRRB_vs_SCoverRRB.repBF varNames
## age 6.960942e-01 age
## meanFD 1.553721e+00 meanFD
## viq_all 7.004006e-01 viq_all
## piq_all 7.079673e-01 piq_all
## fsiq4_all 6.959276e-01 fsiq4_all
## A_pct_severity 1.074064e+05 A_pct_severity
## B_pct_severity 6.884517e+07 B_pct_severity
## ADI_social_total 8.545985e-01 ADI_social_total
## ADI_communication_total 8.878127e-01 ADI_communication_total
## ADI_RRB_total 7.048704e+05 ADI_RRB_total
## ados_2_SA_CSS 8.666988e-01 ados_2_SA_CSS
## ados_2_RRB_CSS 9.852443e-01 ados_2_RRB_CSS
## SRS_tscore_self 1.370332e+00 SRS_tscore_self
## RBS_total 7.325785e-01 RBS_total
## SSP_total 8.321138e-01 SSP_total
## vabsdscoress_dss 5.491854e+00 vabsdscoress_dss
## vabsdscoresd_dss 8.226833e-01 vabsdscoresd_dss
## vabsdscoresc_dss 6.967464e-01 vabsdscoresc_dss
## vabsabcabc_standard 9.064006e-01 vabsabcabc_standard
#------------------------------------------------------------------------------
# Z-score threshold to use for subtyping
z_thresh = 0.7
# vars2use = c("dbaes_atotal","dbaes_btotal")
# compute Discovery mean and sd
ds_disc = Dverbal_Discovery[,vars2use[1]] - Dverbal_Discovery[,vars2use[2]]
mean2use = mean(ds_disc)
sd2use = sd(ds_disc)
Dverbal_Discovery = make_subtype(data2use = Dverbal_Discovery,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
# compute Replication mean and sd
ds_rep = Dverbal_Replication[,vars2use[1]] - Dverbal_Replication[,vars2use[2]]
mean2use = mean(ds_rep)
sd2use = sd(ds_rep)
Dverbal_Replication = make_subtype(data2use = Dverbal_Replication,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#------------------------------------------------------------------------------
# Table of subtypes X sex
# Discovery
table(Dverbal_Discovery$z_ds_group,Dverbal_Discovery$sex)
##
## F M
## RRB_over_SC 42 169
## SC_equal_RRB 112 365
## SC_over_RRB 43 158
cs_res = chisq.test(Dverbal_Discovery$z_ds_group,Dverbal_Discovery$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: Dverbal_Discovery$z_ds_group and Dverbal_Discovery$sex
## X-squared = 1.1723, df = 2, p-value = 0.5565
# Replication
table(Dverbal_Replication$z_ds_group,Dverbal_Replication$sex)
##
## F M
## RRB_over_SC 42 170
## SC_equal_RRB 111 374
## SC_over_RRB 43 150
cs_res = chisq.test(Dverbal_Replication$z_ds_group,Dverbal_Replication$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: Dverbal_Replication$z_ds_group and Dverbal_Replication$sex
## X-squared = 0.82192, df = 2, p-value = 0.663
#------------------------------------------------------------------------------
# Descriptive stats
# Discovery
df2use = Dverbal_Discovery[,c("subjectkey","dataset_id","collection_id","interview_age","sex","z_ds_group","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")]
df2use$age = df2use$interview_age/12
cols2use = c("z_ds_group","sex","age","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")
res=describeBy(x = df2use[,cols2use], group = "z_ds_group")
res
##
## Descriptive statistics by group
## group: RRB_over_SC
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 211 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00
## sex* 2 211 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 211 9.96 4.14 9.50 9.61 3.34 2.00 27.17 25.17
## ados_age 4 27 93.59 38.82 86.00 92.04 38.55 37.00 171.00 134.00
## ados_sa_css 5 27 6.52 2.38 7.00 6.61 2.97 2.00 10.00 8.00
## ados_rrb_css 6 27 7.59 2.08 8.00 7.83 1.48 1.00 10.00 9.00
## iq 7 55 103.16 16.12 105.00 103.76 14.83 54.00 139.00 85.00
## dbaes_atotal 8 211 0.22 0.11 0.22 0.22 0.12 0.00 0.51 0.51
## dbaes_btotal 9 211 0.47 0.12 0.46 0.47 0.13 0.14 0.79 0.65
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 0.97 1.63 0.28
## ados_age 0.43 -0.99 7.47
## ados_sa_css -0.35 -1.07 0.46
## ados_rrb_css -1.22 1.58 0.40
## iq -0.47 0.57 2.17
## dbaes_atotal 0.15 -0.64 0.01
## dbaes_btotal 0.08 -0.41 0.01
## ------------------------------------------------------------
## group: SC_equal_RRB
## vars n mean sd median trimmed mad min max range skew
## z_ds_group* 1 477 2.00 0.00 2.00 2.00 0.00 2 2.00 0.00 NaN
## sex* 2 477 NaN NA NA NaN NA Inf -Inf -Inf NA
## age 3 477 9.13 5.62 8.00 8.34 4.82 0 45.75 45.75 1.98
## ados_age 4 81 80.81 45.01 64.00 75.60 38.55 27 202.00 175.00 0.78
## ados_sa_css 5 81 6.77 2.08 7.00 6.85 1.48 1 10.00 9.00 -0.33
## ados_rrb_css 6 81 7.65 2.35 8.00 8.06 1.48 1 10.00 9.00 -1.55
## iq 7 119 104.25 18.51 107.00 105.64 17.79 42 138.00 96.00 -0.82
## dbaes_atotal 8 477 0.30 0.13 0.30 0.30 0.14 0 0.67 0.67 0.00
## dbaes_btotal 9 477 0.31 0.13 0.31 0.31 0.14 0 0.68 0.68 -0.13
## kurtosis se
## z_ds_group* NaN 0.00
## sex* NA NA
## age 6.55 0.26
## ados_age -0.54 5.00
## ados_sa_css -0.30 0.23
## ados_rrb_css 2.12 0.26
## iq 0.95 1.70
## dbaes_atotal -0.34 0.01
## dbaes_btotal -0.26 0.01
## ------------------------------------------------------------
## group: SC_over_RRB
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 201 3.00 0.00 3.00 3.00 0.00 3.00 3.00 0.00
## sex* 2 201 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 201 7.31 5.08 5.75 6.41 2.97 1.67 37.33 35.67
## ados_age 4 45 69.80 35.62 65.00 63.81 25.20 30.00 172.00 142.00
## ados_sa_css 5 45 7.51 1.55 7.00 7.54 1.48 4.00 10.00 6.00
## ados_rrb_css 6 45 8.18 1.70 8.00 8.35 1.48 1.00 10.00 9.00
## iq 7 25 104.00 19.93 111.00 105.29 13.34 40.00 140.00 100.00
## dbaes_atotal 8 201 0.48 0.13 0.47 0.47 0.14 0.16 0.87 0.71
## dbaes_btotal 9 201 0.22 0.10 0.22 0.22 0.10 0.00 0.57 0.57
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 2.44 8.14 0.36
## ados_age 1.54 1.89 5.31
## ados_sa_css -0.04 -0.77 0.23
## ados_rrb_css -1.64 4.86 0.25
## iq -1.08 2.07 3.99
## dbaes_atotal 0.21 -0.21 0.01
## dbaes_btotal 0.30 -0.02 0.01
# Replication
df2use = Dverbal_Replication[,c("subjectkey","dataset_id","collection_id","interview_age","sex","z_ds_group","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")]
df2use$age = df2use$interview_age/12
cols2use = c("z_ds_group","sex","age","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")
res=describeBy(x = df2use[,cols2use], group = "z_ds_group")
res
##
## Descriptive statistics by group
## group: RRB_over_SC
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 212 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00
## sex* 2 212 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 212 9.88 4.73 9.33 9.40 4.14 3.00 28.58 25.58
## ados_age 4 17 84.00 51.33 75.00 80.20 54.86 36.00 189.00 153.00
## ados_sa_css 5 17 6.53 2.21 7.00 6.53 2.97 3.00 10.00 7.00
## ados_rrb_css 6 17 7.18 1.51 7.00 7.13 1.48 5.00 10.00 5.00
## iq 7 63 103.48 18.70 105.00 104.24 14.83 57.00 152.00 95.00
## dbaes_atotal 8 212 0.23 0.11 0.22 0.22 0.11 0.01 0.61 0.60
## dbaes_btotal 9 212 0.49 0.13 0.48 0.48 0.11 0.21 0.93 0.72
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 1.18 2.07 0.32
## ados_age 0.64 -1.09 12.45
## ados_sa_css -0.18 -1.35 0.54
## ados_rrb_css 0.33 -0.64 0.37
## iq -0.30 0.52 2.36
## dbaes_atotal 0.63 0.74 0.01
## dbaes_btotal 0.48 0.77 0.01
## ------------------------------------------------------------
## group: SC_equal_RRB
## vars n mean sd median trimmed mad min max range skew
## z_ds_group* 1 485 2.00 0.00 2.00 2.00 0.00 2 2.00 0.00 NaN
## sex* 2 485 NaN NA NA NaN NA Inf -Inf -Inf NA
## age 3 485 8.78 5.19 7.75 8.04 4.57 0 33.83 33.83 1.67
## ados_age 4 96 81.18 38.52 73.00 77.08 42.25 35 196.00 161.00 0.82
## ados_sa_css 5 96 6.85 2.03 7.00 6.90 1.48 2 10.00 8.00 -0.06
## ados_rrb_css 6 96 7.33 2.44 8.00 7.71 1.48 1 10.00 9.00 -1.32
## iq 7 92 106.80 15.98 108.00 106.42 14.08 64 146.00 82.00 0.11
## dbaes_atotal 8 485 0.31 0.14 0.31 0.31 0.13 0 0.78 0.78 0.06
## dbaes_btotal 9 485 0.32 0.14 0.32 0.32 0.14 0 0.81 0.81 0.11
## kurtosis se
## z_ds_group* NaN 0.00
## sex* NA NA
## age 3.90 0.24
## ados_age 0.04 3.93
## ados_sa_css -0.73 0.21
## ados_rrb_css 1.14 0.25
## iq -0.01 1.67
## dbaes_atotal -0.03 0.01
## dbaes_btotal 0.09 0.01
## ------------------------------------------------------------
## group: SC_over_RRB
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 193 3.00 0.00 3.00 3.00 0.00 3.00 3.00 0.00
## sex* 2 193 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 193 8.11 6.31 6.33 6.83 3.71 2.08 40.92 38.83
## ados_age 4 40 71.15 28.16 68.00 67.28 23.72 30.00 141.00 111.00
## ados_sa_css 5 40 7.25 1.84 7.00 7.28 1.48 3.00 10.00 7.00
## ados_rrb_css 6 40 7.53 2.23 8.00 7.78 1.48 1.00 10.00 9.00
## iq 7 31 111.52 18.36 115.00 112.60 16.31 62.00 146.00 84.00
## dbaes_atotal 8 193 0.49 0.13 0.49 0.48 0.13 0.14 0.96 0.82
## dbaes_btotal 9 193 0.23 0.12 0.23 0.23 0.13 0.00 0.50 0.50
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 2.21 5.59 0.45
## ados_age 1.04 0.50 4.45
## ados_sa_css -0.07 -0.88 0.29
## ados_rrb_css -1.11 1.25 0.35
## iq -0.60 -0.04 3.30
## dbaes_atotal 0.28 0.34 0.01
## dbaes_btotal 0.02 -0.66 0.01
#------------------------------------------------------------------------------
# Tests of differences in interview age across the subtypes
# Discovery
mod2use = lm(formula = interview_age ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: interview_age
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 111058 55529 14.339 7.433e-07 ***
## Residuals 886 3431008 3872
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = interview_age ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: interview_age
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 48109 24054.6 5.8372 0.00303 **
## Residuals 887 3655255 4120.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in SC across the subtypes
# Discovery
mod2use = lm(formula = dbaes_atotal ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_atotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 7.1881 3.5940 213.96 < 2.2e-16 ***
## Residuals 886 14.8827 0.0168
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = dbaes_atotal ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_atotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 7.294 3.6470 212.52 < 2.2e-16 ***
## Residuals 887 15.221 0.0172
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in RRB across the subtypes
# Discovery
mod2use = lm(formula = dbaes_btotal ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_btotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 6.6145 3.3073 213.73 < 2.2e-16 ***
## Residuals 886 13.7103 0.0155
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = dbaes_btotal ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_btotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 6.9404 3.4702 200.03 < 2.2e-16 ***
## Residuals 887 15.3882 0.0173
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in ADOS SA CSS across the subtypes
# Discovery
mod2use = lm(formula = ados_sa_css ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_sa_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 21.94 10.9711 2.7587 0.06659 .
## Residuals 150 596.53 3.9769
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = ados_sa_css ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_sa_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 7.37 3.6826 0.9211 0.4003
## Residuals 150 599.69 3.9980
#------------------------------------------------------------------------------
# Tests of differences in ADOS RRB CSS across the subtypes
# Discovery
mod2use = lm(formula = ados_rrb_css ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_rrb_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 9.30 4.6508 1.0268 0.3607
## Residuals 150 679.42 4.5294
# Replication
mod2use = lm(formula = ados_rrb_css ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_rrb_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 1.72 0.8622 0.1625 0.8502
## Residuals 150 795.78 5.3052
#------------------------------------------------------------------------------
# Tests of differences in IQ across the subtypes
# Discovery
mod2use = lm(formula = iq ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: iq
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 45 22.37 0.0685 0.9338
## Residuals 196 63984 326.45
# Replication
mod2use = lm(formula = iq ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: iq
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 1364 682.15 2.268 0.1064
## Residuals 183 55042 300.78
#------------------------------------------------------------------------------
# Make scatterplots with difference score Z subtypes in different colors
maxScores = c(3,4)
p_disc = ggplot(data = Dverbal_Discovery, aes(x = dbaes_atotal, y = dbaes_btotal, colour = z_ds_group)) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,1) + xlim(0,1) + ggtitle("NDAR Discovery")
p1_top_left = p_disc + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Disc_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p1_top_left)
p_disc
table(Dverbal_Discovery$z_ds_group)
##
## RRB_over_SC SC_equal_RRB SC_over_RRB
## 211 477 201
cor_res = cor.test(Dverbal_Discovery$dbaes_atotal, Dverbal_Discovery$dbaes_btotal)
cor_res
##
## Pearson's product-moment correlation
##
## data: Dverbal_Discovery$dbaes_atotal and Dverbal_Discovery$dbaes_btotal
## t = 6.6829, df = 887, p-value = 4.134e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1554332 0.2806578
## sample estimates:
## cor
## 0.2189469
p_rep = ggplot(data = Dverbal_Replication, aes(x = dbaes_atotal, y = dbaes_btotal, colour = z_ds_group)) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,1) + xlim(0,1) + ggtitle("NDAR Replication")
p2_bottom_left = p_rep + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Rep_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p2_bottom_left)
p_rep
table(Dverbal_Replication$z_ds_group)
##
## RRB_over_SC SC_equal_RRB SC_over_RRB
## 212 485 193
cor_res = cor.test(Dverbal_Replication$dbaes_atotal, Dverbal_Replication$dbaes_btotal)
cor_res
##
## Pearson's product-moment correlation
##
## data: Dverbal_Replication$dbaes_atotal and Dverbal_Replication$dbaes_btotal
## t = 7.1459, df = 888, p-value = 1.864e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1700824 0.2943934
## sample estimates:
## cor
## 0.2331903
#------------------------------------------------------------------------------
# Run supervised model with Discovery as training and Replication as Test
# run validation
# make subtypes using z-scores computed from the mean and sd of the training set
train_data = Dverbal_Discovery
test_data = Dverbal_Replication
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
tmp_train = make_subtype(data2use = train_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
mean2use = mean(test_data[,vars2use[1]] - test_data[,vars2use[2]])
sd2use = sd(test_data[,vars2use[1]] - test_data[,vars2use[2]])
tmp_test = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#==============================================================================
#==============================================================================
# make labels based on train mean and sd
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
train_mean = mean2use
train_sd = sd2use
pred_labels = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = train_mean,
sd2use = train_sd)
confmat = table(tmp_test$z_ds_group,pred_labels$z_ds_group)
acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/dim(pred_labels)[1]
# # compute model
# mod2use = svm(x = tmp_train[,vars2use], y = tmp_train$z_ds_group)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
#==============================================================================
#==============================================================================
# plot confusion matrix
setHook("grid.newpage", function() pushViewport(viewport(x=1,y=1,width=0.9, height=0.9, name="vp", just=c("right","top"))), action="prepend")
pheatmap(confmat/rowSums(confmat)*100, display_numbers = confmat, color = colorRampPalette(c('white','red'))(100), cluster_rows = FALSE, cluster_cols = FALSE, fontsize_number = fontSize, fontsize_row = fontSize, fontsize_col = fontSize,labels_row = c("RRB>SC","SC=RRB","SC>RRB"),labels_col = c("RRB>SC","SC=RRB","SC>RRB"),angle_col=90,
breaks= seq(0,100, length=100))
setHook("grid.newpage", NULL, "replace")
grid::grid.text("Actual Labels", y=-0.07, gp=gpar(fontsize=fontSize))
grid::grid.text("Predicted Labels", x=-0.07, rot=90, gp=gpar(fontsize=fontSize))
# show accuracy
true_accuracy = acc
true_accuracy
## [1] 0.9820225
#================================================================================
#================================================================================
# #------------------------------------------------------------------------------
# # Permute subtype labels to examine how well supervised model performs
#
# # nperm = 10000
#
# # make subtypes using z-scores computed from the mean and sd of the training set
# train_data = Dverbal_Discovery
# test_data = Dverbal_Replication
# mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
# sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
# tmp_train = make_subtype(data2use = train_data,
# z_thresh = z_thresh,
# mean2use = mean2use,
# sd2use = sd2use)
#
# mean2use = mean(test_data[,vars2use[1]] - test_data[,vars2use[2]])
# sd2use = sd(test_data[,vars2use[1]] - test_data[,vars2use[2]])
# tmp_test = make_subtype(data2use = test_data,
# z_thresh = z_thresh,
# mean2use = mean2use,
# sd2use = sd2use)
#
# acc = vector(length = nperm)
# for (iperm in 1:nperm){
# # set seed for reproducibility
# set.seed(iperm)
#
# sc_perm = sample(train_data[,vars2use[1]])
# rrb_perm = sample(train_data[,vars2use[2]])
# perm_mean2use = mean(sc_perm - rrb_perm)
# perm_sd2use = sd(sc_perm - rrb_perm)
# # perm_mean2use = mean(train_data[,vars2use[1]] - rrb_perm)
# # perm_sd2use = sd(train_data[,vars2use[1]] - rrb_perm)
# pred_labels = make_subtype(data2use = tmp_test,
# z_thresh = z_thresh,
# mean2use = perm_mean2use,
# sd2use = perm_sd2use)
# confmat = table(tmp_test$z_ds_group,pred_labels$z_ds_group)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/dim(pred_labels)[1]
#
# # compute model
# permuted_labels = sample(tmp_train$z_ds_group)
# mod2use = svm(x = tmp_train[,vars2use], y = permuted_labels)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc[iperm] = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
# #============================================================================
# } # for (iperm in 1:nperm)
#
# df2plot = data.frame(Accuracy = acc)
# p = ggplot(data = df2plot, aes(x = Accuracy)) + geom_histogram() + geom_vline(xintercept=true_accuracy)
# p
#
# # compute p-value
# pval = sum(c(true_accuracy,acc)>=true_accuracy)/(nperm+1)
# pval
#================================================================================
#================================================================================
#------------------------------------------------------------------------------
# Plot difference score Z subtypes in Discovery set
maxScores = c(3,4)
# Discovery - make plot with all individuals shown as lines
df2use = Dverbal_Discovery[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(tmp_train$z_ds_group)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + ylim(0,1) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p3_middle_top = p + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Disc_jitterplot_z%s.pdf",as.character(z_thresh))), plot = p3_middle_top)
p
# Plot difference score Z subtypes in Replication set
maxScores = c(3,4)
# Replication - make plot with all individuals shown as lines
df2use = Dverbal_Replication[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(tmp_test$z_ds_group)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + ylim(0,1) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p4_middle_bottom = p + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Rep_jitterplot_z%s.pdf",as.character(z_thresh))), plot = p4_middle_bottom)
p
#------------------------------------------------------------------------------
# Apply NDAR subtypes to EU-AIMS LEAP data
# make SC and RRB percentages since EU-AIMS data is specified as percentages
Dverbal = read.csv(file.path(datapath,"tidy_verbal.csv"))
euaims_data = read.csv(file.path(datapath,"tidy_euaims.csv"))
mask1 = euaims_data$Diagnosis=="ASD"
mask2 = (is.na(euaims_data$A1_pct_severity) | is.na(euaims_data$A2_pct_severity) | is.na(euaims_data$A3_pct_severity) | is.na(euaims_data$B1_pct_severity) | is.na(euaims_data$B2_pct_severity) | is.na(euaims_data$B3_pct_severity) | is.na(euaims_data$B4_pct_severity))
euaims_data = subset(euaims_data, (mask1 & !mask2))
Dverbal[,vars2use[1]] = Dverbal[,vars2use[1]]
Dverbal[,vars2use[2]] = Dverbal[,vars2use[2]]
euaims_data[,vars2use[1]] = (euaims_data$A1_pct_severity +
euaims_data$A2_pct_severity +
euaims_data$A3_pct_severity)/3
euaims_data[,vars2use[2]] = (euaims_data$B1_pct_severity +
euaims_data$B2_pct_severity +
euaims_data$B3_pct_severity +
euaims_data$B4_pct_severity)/4
train_data = Dverbal
test_data = euaims_data
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]], na.rm=TRUE)
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]], na.rm=TRUE)
c(mean2use, sd2use)
## [1] -0.01045243 0.19482749
tmp_train = make_subtype(data2use = train_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
tmp_test = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#===========================================================================
#===========================================================================
# # compute model
# mod2use = svm(x = tmp_train[,vars2use], y = tmp_train$z_ds_group)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
#
# tmp_test$svm_pred_labels = pred_labels
#
# # plot confusion matrix
# setHook("grid.newpage", function() pushViewport(viewport(x=1,y=1,width=0.9, height=0.9, name="vp", just=c("right","top"))), action="prepend")
# pheatmap(confmat/rowSums(confmat)*100, display_numbers = confmat, color = colorRampPalette(c('white','red'))(100), cluster_rows = FALSE, cluster_cols = FALSE, fontsize_number = fontSize, fontsize_row = fontSize, fontsize_col = fontSize,labels_row = c("RRB>SC","SC=RRB","SC>RRB"),labels_col = c("RRB>SC","SC=RRB","SC>RRB"),angle_col=90,
# breaks= seq(0,100, length=100))
# setHook("grid.newpage", NULL, "replace")
# grid::grid.text("Actual Labels", y=-0.07, gp=gpar(fontsize=fontSize))
# grid::grid.text("Predicted Labels", x=-0.07, rot=90, gp=gpar(fontsize=fontSize))
#===========================================================================
#===========================================================================
# scatterplot
p1 = ggplot(data = tmp_train, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(z_ds_group))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("NDAR ALL")
p1
p2 = ggplot(data = tmp_test, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(z_ds_group))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("EU-AIMS with Groups from NDAR ALL")
p2
#===========================================================================
#===========================================================================
# p3 = ggplot(data = tmp_test, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(pred_labels))) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("EU-AIMS")
# p5_bottom_right = p3 + guides(colour=FALSE)
# ggsave(filename = file.path(plotpath, sprintf("final_EUAIMS_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p5_bottom_right)
# p3
#===========================================================================
#===========================================================================
# # write out EU-AIMS LEAP data with subgroups defined by NDAR All
write.csv(tmp_test, file.path(datapath, sprintf("tidy_euaims_NDAR_subtypes_diffscore_z%s.csv",as.character(z_thresh))))
#===========================================================================
#===========================================================================
# # Make final plot
# p_final = p1_top_left + p3_middle_top + plot_spacer() + p2_bottom_left + p4_middle_bottom + p5_bottom_right + plot_layout(nrow=3, ncol=3, widths = c(4,4,4), heights = c(8,8,8))
# ggsave(filename = file.path(plotpath, sprintf("final_NDAR_EUAIMS_subtypes_plot_z%s.pdf",as.character(z_thresh))), plot = p_final)
# p_final
#===========================================================================
#===========================================================================
#------------------------------------------------------------------------------
# Integrate subtypes with rest of EU-AIMS LEAP data
euaims_data = read.csv(file.path(datapath,"tidy_euaims.csv"))
td_df = subset(euaims_data, euaims_data$Diagnosis=="TD",select=2:6)
td_df$A_pct_severity = as.numeric(NA)
td_df$B_pct_severity = as.numeric(NA)
td_df$subgrp = "TD"
#===========================================================================
#===========================================================================
# cols2use = c("subid","age","Centre","Schedule","Diagnosis","dbaes_atotal","dbaes_btotal","svm_pred_labels")
cols2use = c("subid","age","Centre","Schedule","Diagnosis","dbaes_atotal","dbaes_btotal","z_ds_group")
#===========================================================================
#===========================================================================
tmp_asd = tmp_test[,cols2use]
colnames(tmp_asd)[6] = "A_pct_severity"
colnames(tmp_asd)[7] = "B_pct_severity"
colnames(tmp_asd)[8] = "subgrp"
asd_df = tmp_asd
all_data = rbind(td_df,asd_df)
fname = "/Users/mlombardo/Dropbox/euaims/data/rsfmri_preproc/euaims_preproc.xlsx"
pp_data = read_excel(fname)
mask = pp_data$notes=="ok"
pp_data = subset(pp_data, mask)
asd_df = merge(pp_data[,c("subid","sex")],asd_df, by = "subid")
td_df = merge(pp_data[,c("subid","sex")],td_df, by = "subid")
all_data = rbind(td_df,asd_df)
data2write = merge(pp_data, all_data, by = "subid")
data2write$age = data2write$age.x
data2write$sex = data2write$sex.y
print(table(data2write$Schedule, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## A 7 36 37 78
## B 1 45 39 83
## C 4 27 32 59
## D 1 10 27 23
print(table(data2write$Centre, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## CAMBRIDGE 4 27 11 29
## KINGS_COLLEGE 7 49 50 78
## MANNHEIM 0 0 0 34
## NIJMEGEN 2 35 56 64
## UTRECHT 0 7 18 38
print(table(data2write$sex, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## Female 5 30 36 88
## Male 8 88 99 155
#DSM-5 - find best split that balances participants across sites
a = findBestSplit(asd_df, seed_range = c(172342)) #,300001:500000))
## [1] 172342
best_seeds = a$seed[!is.na(a$discrepancy) & a$discrepancy==min(a$discrepancy, na.rm = TRUE)]
print(best_seeds)
## [1] 172342
# Split datasets -------------------------------------------------------------
rngSeed = best_seeds[1]
# split Schedule A dataset
dset2use = subset(asd_df, asd_df$Schedule=="A")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
A_Discovery = tmp_d[[2]]
A_Replication = tmp_d[[1]]
# split Schedule B dataset
dset2use = subset(asd_df, asd_df$Schedule=="B")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
B_Discovery = tmp_d[[2]]
B_Replication = tmp_d[[1]]
# split Schedule C dataset
dset2use = subset(asd_df, asd_df$Schedule=="C")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
C_Discovery = tmp_d[[2]]
C_Replication = tmp_d[[1]]
# split Schedule D dataset
dset2use = subset(asd_df, asd_df$Schedule=="D")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
D_Discovery = tmp_d[[2]]
D_Replication = tmp_d[[1]]
df_Disc = rbind(A_Discovery, B_Discovery, C_Discovery, D_Discovery)
df_Rep = rbind(A_Replication, B_Replication, C_Replication, D_Replication)
a = table(df_Disc$Schedule, df_Disc$Centre); print(a)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A 6 17 10 7
## B 9 16 14 3
## C 6 9 14 2
## D 0 10 10 0
b = table(df_Rep$Schedule, df_Rep$Centre); print(b)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A 7 18 10 5
## B 8 17 13 5
## C 6 10 13 3
## D 0 9 9 0
print(a-b)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A -1 -1 0 2
## B 1 -1 1 -2
## C 0 -1 1 -1
## D 0 1 1 0
print(sum(rowSums(abs(a-b))))
## [1] 14
data2write$dataset = NA
mask = is.element(data2write$subid,df_Disc$subid)
data2write[mask,"dataset"] = "Discovery"
mask = is.element(data2write$subid,df_Rep$subid)
data2write[mask,"dataset"] = "Replication"
asd_Disc = subset(data2write, data2write$dataset=="Discovery" & data2write$Diagnosis=="ASD")
asd_Rep = subset(data2write, data2write$dataset=="Replication" & data2write$Diagnosis=="ASD")
# # find which seed gives best TD age-match -------------------------------------
# seeds = 1:1000
# pvals = data.frame(matrix(nrow = length(seeds), ncol = 2))
# for (i in 1:length(seeds)) {
# res = findTDAgeMatch(data2write, seed_range = c(seeds[i],seeds[i]))
# td_Disc_matched = res[[2]]
# td_Rep_matched = res[[1]]
# tres = t.test(td_Disc_matched$age, asd_Disc$age)
# pvals[i,1] = tres$p.value
# tres = t.test(td_Rep_matched$age, asd_Rep$age)
# pvals[i,2] = tres$p.value
# #print(i)
# }
# a = sort.int(pvals[,1], decreasing = TRUE, index.return = TRUE)
# b=pvals[a$ix,]
seed2use = 929
res = findTDAgeMatch(data2write, seed_range = c(seed2use,seed2use))
td_Disc_matched = res[[2]]
td_Rep_matched = res[[1]]
mask = is.element(data2write$subid, td_Disc_matched$subid)
data2write$dataset[mask] = "Discovery"
mask = is.element(data2write$subid, td_Rep_matched$subid)
data2write$dataset[mask] = "Replication"
print(table(data2write$dataset, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## Discovery 10 56 67 121
## Replication 3 62 68 122
fname2save = here(sprintf("asd_subgrp_data_rsfmri_ALL_DSM5_diffzscoreGrps_z%s.csv",as.character(z_thresh)))
write.csv(data2write,file = fname2save)
#------------------------------------------------------------------------------
# Descriptive stats
df2use = data2write[,c("subid","Diagnosis","dataset","Centre","meanFD","age","sex","subgrp","viq_all","piq_all","fsiq4_all","A_pct_severity","B_pct_severity","ADI_social_total","ADI_communication_total","ADI_RRB_total","ados_2_SA_CSS","ados_2_RRB_CSS","SRS_tscore","SRS_tscore_self","RBS_total","SSP_total","vabsdscoresc_dss","vabsdscoresd_dss","vabsdscoress_dss","vabsabcabc_standard")]
mask = df2use==999 | df2use==777
df2use[mask] = NA
df2use$age = df2use$age/365
cols2use = c("dataset","subgrp","age","meanFD","viq_all","piq_all","fsiq4_all","A_pct_severity","B_pct_severity","ADI_social_total","ADI_communication_total","ADI_RRB_total","ados_2_SA_CSS","ados_2_RRB_CSS","SRS_tscore","SRS_tscore_self","RBS_total","SSP_total","vabsdscoresc_dss","vabsdscoresd_dss","vabsdscoress_dss","vabsabcabc_standard")
res=describeBy(x = df2use[,cols2use], group = c("subgrp","dataset"))
res
##
## Descriptive statistics by group
## subgrp: RRB_over_SC
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 10 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 10 NaN NA NA NaN NA Inf -Inf
## age 3 10 18.01 6.59 21.73 18.53 3.30 7.89 24.03
## meanFD 4 10 0.26 0.32 0.18 0.18 0.07 0.06 1.14
## viq_all 5 10 98.49 16.82 101.00 98.50 15.57 73.00 123.85
## piq_all 6 10 98.30 14.88 99.50 100.62 11.86 64.00 114.00
## fsiq4_all 7 10 98.50 14.56 99.00 99.12 15.57 74.00 118.01
## A_pct_severity 8 10 0.20 0.10 0.16 0.19 0.10 0.05 0.35
## B_pct_severity 9 10 0.44 0.09 0.43 0.44 0.11 0.30 0.59
## ADI_social_total 10 10 17.50 7.53 20.50 18.00 7.41 5.00 26.00
## ADI_communication_total 11 10 15.60 5.89 16.00 15.75 7.41 6.00 24.00
## ADI_RRB_total 12 10 8.30 1.34 8.00 8.25 1.48 7.00 10.00
## ados_2_SA_CSS 13 10 4.50 2.99 3.50 4.38 3.71 1.00 9.00
## ados_2_RRB_CSS 14 10 3.90 3.78 1.00 3.62 0.00 1.00 9.00
## SRS_tscore 15 6 72.83 10.68 74.00 72.83 6.67 58.00 90.00
## SRS_tscore_self 16 4 59.00 7.12 61.00 59.00 4.45 49.00 65.00
## RBS_total 17 5 18.20 7.66 19.00 18.20 5.93 8.00 29.00
## SSP_total 18 4 146.25 21.41 149.50 146.25 20.02 119.00 167.00
## vabsdscoresc_dss 19 6 81.67 17.78 76.00 81.67 11.86 67.00 115.00
## vabsdscoresd_dss 20 6 69.33 8.50 72.50 69.33 5.93 57.00 79.00
## vabsdscoress_dss 21 6 71.50 8.48 72.00 71.50 5.93 57.00 82.00
## vabsabcabc_standard 22 6 72.33 9.69 71.00 72.33 6.67 59.00 88.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 16.14 -0.50 -1.72 2.08
## meanFD 1.09 2.08 3.01 0.10
## viq_all 50.85 -0.17 -1.52 5.32
## piq_all 50.00 -0.99 0.13 4.71
## fsiq4_all 44.01 -0.24 -1.35 4.60
## A_pct_severity 0.30 0.18 -1.67 0.03
## B_pct_severity 0.30 0.09 -1.50 0.03
## ADI_social_total 21.00 -0.36 -1.66 2.38
## ADI_communication_total 18.00 -0.08 -1.43 1.86
## ADI_RRB_total 3.00 0.26 -1.85 0.42
## ados_2_SA_CSS 8.00 0.21 -1.77 0.95
## ados_2_RRB_CSS 8.00 0.40 -1.95 1.20
## SRS_tscore 32.00 0.20 -1.24 4.36
## SRS_tscore_self 16.00 -0.50 -1.88 3.56
## RBS_total 21.00 0.08 -1.58 3.43
## SSP_total 48.00 -0.24 -2.09 10.70
## vabsdscoresc_dss 48.00 0.94 -0.82 7.26
## vabsdscoresd_dss 22.00 -0.36 -1.80 3.47
## vabsdscoress_dss 25.00 -0.47 -1.15 3.46
## vabsabcabc_standard 29.00 0.27 -1.25 3.96
## ------------------------------------------------------------
## subgrp: SC_equal_RRB
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 56 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 56 NaN NA NA NaN NA Inf -Inf
## age 3 56 16.27 5.73 14.82 15.94 4.79 7.08 30.28
## meanFD 4 56 0.30 0.52 0.19 0.22 0.13 0.04 3.95
## viq_all 5 55 97.21 17.99 97.35 97.48 18.76 61.00 136.00
## piq_all 6 55 99.94 19.47 102.00 100.41 19.19 61.00 142.00
## fsiq4_all 7 56 98.83 17.79 102.61 99.44 19.37 60.00 131.00
## A_pct_severity 8 56 0.31 0.15 0.32 0.31 0.15 0.00 0.63
## B_pct_severity 9 56 0.32 0.16 0.29 0.31 0.16 0.01 0.69
## ADI_social_total 10 56 17.25 7.05 18.50 17.65 7.41 2.00 27.00
## ADI_communication_total 11 56 14.04 6.14 14.00 14.15 7.41 0.00 26.00
## ADI_RRB_total 12 56 5.55 2.43 5.00 5.54 2.97 0.00 12.00
## ados_2_SA_CSS 13 55 6.20 2.58 6.00 6.27 2.97 1.00 10.00
## ados_2_RRB_CSS 14 55 5.11 2.75 5.00 5.11 2.97 1.00 10.00
## SRS_tscore 15 50 71.96 11.78 74.00 72.20 13.34 47.00 90.00
## SRS_tscore_self 16 30 61.67 11.32 62.50 61.42 12.60 43.00 89.00
## RBS_total 17 49 18.57 14.44 17.00 16.80 13.34 0.00 60.00
## SSP_total 18 31 131.10 29.20 136.00 130.64 29.65 81.00 187.00
## vabsdscoresc_dss 19 55 72.45 18.89 75.00 73.24 13.34 21.00 122.00
## vabsdscoresd_dss 20 54 71.72 17.20 70.50 71.64 12.60 25.00 131.00
## vabsdscoress_dss 21 55 70.04 15.32 73.00 71.33 11.86 20.00 95.00
## vabsabcabc_standard 22 54 70.15 13.39 71.50 70.68 9.64 20.00 101.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 23.20 0.58 -0.54 0.77
## meanFD 3.91 6.09 39.53 0.07
## viq_all 75.00 -0.16 -0.65 2.43
## piq_all 81.00 -0.25 -0.67 2.62
## fsiq4_all 71.00 -0.32 -0.70 2.38
## A_pct_severity 0.63 -0.04 -0.65 0.02
## B_pct_severity 0.68 0.35 -0.48 0.02
## ADI_social_total 25.00 -0.47 -0.91 0.94
## ADI_communication_total 26.00 -0.18 -0.77 0.82
## ADI_RRB_total 12.00 0.09 -0.19 0.33
## ados_2_SA_CSS 9.00 -0.25 -1.14 0.35
## ados_2_RRB_CSS 9.00 -0.33 -0.97 0.37
## SRS_tscore 43.00 -0.21 -1.08 1.67
## SRS_tscore_self 46.00 0.18 -0.78 2.07
## RBS_total 60.00 1.09 0.61 2.06
## SSP_total 106.00 0.11 -1.04 5.25
## vabsdscoresc_dss 101.00 -0.39 0.82 2.55
## vabsdscoresd_dss 106.00 0.29 2.14 2.34
## vabsdscoress_dss 75.00 -0.93 1.05 2.07
## vabsabcabc_standard 81.00 -0.78 2.70 1.82
## ------------------------------------------------------------
## subgrp: SC_over_RRB
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 67 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 67 NaN NA NA NaN NA Inf -Inf
## age 3 67 16.19 5.18 15.91 16.02 5.09 7.56 29.40
## meanFD 4 67 0.24 0.23 0.15 0.19 0.10 0.03 1.08
## viq_all 5 66 98.21 19.44 100.00 97.87 19.75 64.55 142.00
## piq_all 6 66 99.48 22.94 102.49 100.22 21.34 52.43 150.00
## fsiq4_all 7 67 98.93 19.61 102.00 99.37 19.27 59.00 143.00
## A_pct_severity 8 67 0.42 0.14 0.44 0.42 0.13 0.16 0.82
## B_pct_severity 9 67 0.17 0.10 0.15 0.16 0.11 0.00 0.40
## ADI_social_total 10 67 17.55 6.47 18.00 17.85 7.41 3.00 28.00
## ADI_communication_total 11 67 14.19 5.02 15.00 14.35 5.93 2.00 24.00
## ADI_RRB_total 12 67 2.97 1.98 3.00 2.87 1.48 0.00 8.00
## ados_2_SA_CSS 13 65 6.35 2.58 7.00 6.51 2.97 1.00 10.00
## ados_2_RRB_CSS 14 65 4.68 2.81 5.00 4.58 2.97 1.00 10.00
## SRS_tscore 15 58 72.90 12.17 74.00 73.56 13.34 44.00 95.00
## SRS_tscore_self 16 26 63.65 12.35 62.00 62.77 8.90 42.00 94.00
## RBS_total 17 56 15.88 16.27 13.00 13.50 14.83 0.00 90.00
## SSP_total 18 44 140.68 30.53 141.00 142.28 34.84 53.00 189.00
## vabsdscoresc_dss 19 62 71.60 15.81 72.00 72.72 10.38 21.00 104.00
## vabsdscoresd_dss 20 62 71.76 16.40 73.00 71.86 14.83 17.00 112.00
## vabsdscoress_dss 21 62 68.08 16.65 69.00 69.38 14.08 20.00 104.00
## vabsabcabc_standard 22 62 68.29 15.31 70.50 69.32 10.38 6.00 103.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 21.85 0.31 -0.44 0.63
## meanFD 1.05 2.12 4.27 0.03
## viq_all 77.45 0.13 -0.64 2.39
## piq_all 97.57 -0.28 -0.54 2.82
## fsiq4_all 84.00 -0.23 -0.91 2.40
## A_pct_severity 0.66 0.28 -0.23 0.02
## B_pct_severity 0.40 0.33 -0.78 0.01
## ADI_social_total 25.00 -0.35 -0.84 0.79
## ADI_communication_total 22.00 -0.23 -0.60 0.61
## ADI_RRB_total 8.00 0.58 -0.33 0.24
## ados_2_SA_CSS 9.00 -0.51 -0.87 0.32
## ados_2_RRB_CSS 9.00 -0.17 -1.21 0.35
## SRS_tscore 51.00 -0.36 -0.39 1.60
## SRS_tscore_self 52.00 0.89 0.34 2.42
## RBS_total 90.00 2.03 6.03 2.17
## SSP_total 136.00 -0.51 -0.12 4.60
## vabsdscoresc_dss 83.00 -1.03 2.46 2.01
## vabsdscoresd_dss 95.00 -0.33 1.37 2.08
## vabsdscoress_dss 84.00 -0.77 0.84 2.11
## vabsabcabc_standard 97.00 -1.41 4.41 1.94
## ------------------------------------------------------------
## subgrp: TD
## dataset: Discovery
## vars n mean sd median trimmed mad min
## dataset* 1 121 NaN NA NA NaN NA Inf
## subgrp* 2 121 NaN NA NA NaN NA Inf
## age 3 121 16.83 5.23 16.65 16.73 5.69 7.22
## meanFD 4 121 0.18 0.15 0.13 0.15 0.07 0.03
## viq_all 5 119 104.52 19.70 105.00 105.03 19.27 46.00
## piq_all 6 119 106.10 19.47 107.00 107.53 17.79 49.00
## fsiq4_all 7 119 105.72 18.33 108.18 106.99 16.58 53.00
## A_pct_severity 8 0 NaN NA NA NaN NA Inf
## B_pct_severity 9 0 NaN NA NA NaN NA Inf
## ADI_social_total 10 0 NaN NA NA NaN NA Inf
## ADI_communication_total 11 0 NaN NA NA NaN NA Inf
## ADI_RRB_total 12 0 NaN NA NA NaN NA Inf
## ados_2_SA_CSS 13 0 NaN NA NA NaN NA Inf
## ados_2_RRB_CSS 14 0 NaN NA NA NaN NA Inf
## SRS_tscore 15 68 47.84 9.40 45.00 46.32 5.19 37.00
## SRS_tscore_self 16 71 46.69 4.85 46.00 46.26 4.45 39.00
## RBS_total 17 68 2.15 4.74 0.00 0.95 0.00 0.00
## SSP_total 18 59 177.86 12.71 182.00 179.78 8.90 122.00
## vabsdscoresc_dss 19 34 91.97 25.44 99.50 93.50 21.50 21.00
## vabsdscoresd_dss 20 34 90.74 20.28 98.50 92.25 17.05 33.00
## vabsdscoress_dss 21 34 96.21 23.67 102.50 98.57 21.50 33.00
## vabsabcabc_standard 22 34 92.06 23.04 99.50 93.89 15.57 25.00
## max range skew kurtosis se
## dataset* -Inf -Inf NA NA NA
## subgrp* -Inf -Inf NA NA NA
## age 29.84 22.62 0.17 -0.51 0.48
## meanFD 0.85 0.82 2.28 5.65 0.01
## viq_all 160.00 114.00 -0.24 0.38 1.81
## piq_all 147.00 98.00 -0.69 0.32 1.79
## fsiq4_all 142.00 89.00 -0.69 0.58 1.68
## A_pct_severity -Inf -Inf NA NA NA
## B_pct_severity -Inf -Inf NA NA NA
## ADI_social_total -Inf -Inf NA NA NA
## ADI_communication_total -Inf -Inf NA NA NA
## ADI_RRB_total -Inf -Inf NA NA NA
## ados_2_SA_CSS -Inf -Inf NA NA NA
## ados_2_RRB_CSS -Inf -Inf NA NA NA
## SRS_tscore 76.00 39.00 1.54 1.44 1.14
## SRS_tscore_self 63.00 24.00 0.93 1.10 0.58
## RBS_total 27.00 27.00 3.13 10.87 0.57
## SSP_total 190.00 68.00 -1.90 4.85 1.66
## vabsdscoresc_dss 138.00 117.00 -0.71 0.36 4.36
## vabsdscoresd_dss 121.00 88.00 -0.80 0.07 3.48
## vabsdscoress_dss 129.00 96.00 -0.83 -0.31 4.06
## vabsabcabc_standard 127.00 102.00 -0.90 0.30 3.95
## ------------------------------------------------------------
## subgrp: RRB_over_SC
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 3 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 3 NaN NA NA NaN NA Inf -Inf
## age 3 3 14.54 4.39 12.61 14.54 1.72 11.45 19.56
## meanFD 4 3 0.23 0.14 0.20 0.23 0.15 0.10 0.38
## viq_all 5 3 111.00 28.00 99.00 111.00 11.86 91.00 143.00
## piq_all 6 3 119.33 29.54 121.00 119.33 40.03 89.00 148.00
## fsiq4_all 7 3 115.67 28.75 106.00 115.67 19.27 93.00 148.00
## A_pct_severity 8 3 0.20 0.07 0.16 0.20 0.01 0.15 0.27
## B_pct_severity 9 3 0.40 0.05 0.40 0.40 0.08 0.35 0.45
## ADI_social_total 10 3 15.67 2.52 16.00 15.67 2.97 13.00 18.00
## ADI_communication_total 11 3 8.67 2.52 9.00 8.67 2.97 6.00 11.00
## ADI_RRB_total 12 3 5.67 1.53 6.00 5.67 1.48 4.00 7.00
## ados_2_SA_CSS 13 3 4.67 2.52 5.00 4.67 2.97 2.00 7.00
## ados_2_RRB_CSS 14 3 4.33 3.06 5.00 4.33 2.97 1.00 7.00
## SRS_tscore 15 3 68.00 6.93 72.00 68.00 0.00 60.00 72.00
## SRS_tscore_self 16 1 67.00 NA 67.00 67.00 0.00 67.00 67.00
## RBS_total 17 3 12.67 5.51 13.00 12.67 7.41 7.00 18.00
## SSP_total 18 2 146.00 9.90 146.00 146.00 10.38 139.00 153.00
## vabsdscoresc_dss 19 3 82.33 14.43 74.00 82.33 0.00 74.00 99.00
## vabsdscoresd_dss 20 3 69.33 4.16 68.00 69.33 2.97 66.00 74.00
## vabsdscoress_dss 21 3 84.33 9.71 82.00 84.33 8.90 76.00 95.00
## vabsabcabc_standard 22 3 65.67 23.18 77.00 65.67 5.93 39.00 81.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 8.11 0.35 -2.33 2.53
## meanFD 0.28 0.18 -2.33 0.08
## viq_all 52.00 0.35 -2.33 16.17
## piq_all 59.00 -0.06 -2.33 17.05
## fsiq4_all 55.00 0.30 -2.33 16.60
## A_pct_severity 0.12 0.38 -2.33 0.04
## B_pct_severity 0.11 -0.02 -2.33 0.03
## ADI_social_total 5.00 -0.13 -2.33 1.45
## ADI_communication_total 5.00 -0.13 -2.33 1.45
## ADI_RRB_total 3.00 -0.21 -2.33 0.88
## ados_2_SA_CSS 5.00 -0.13 -2.33 1.45
## ados_2_RRB_CSS 6.00 -0.21 -2.33 1.76
## SRS_tscore 12.00 -0.38 -2.33 4.00
## SRS_tscore_self 0.00 NA NA NA
## RBS_total 11.00 -0.06 -2.33 3.18
## SSP_total 14.00 0.00 -2.75 7.00
## vabsdscoresc_dss 25.00 0.38 -2.33 8.33
## vabsdscoresd_dss 8.00 0.29 -2.33 2.40
## vabsdscoress_dss 19.00 0.23 -2.33 5.61
## vabsabcabc_standard 42.00 -0.37 -2.33 13.38
## ------------------------------------------------------------
## subgrp: SC_equal_RRB
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 62 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 62 NaN NA NA NaN NA Inf -Inf
## age 3 62 16.82 5.94 16.42 16.51 6.13 7.12 30.15
## meanFD 4 62 0.23 0.23 0.18 0.19 0.11 0.05 1.60
## viq_all 5 61 101.74 16.02 102.73 102.15 17.39 70.00 133.00
## piq_all 6 61 103.30 18.99 105.98 104.48 19.31 52.00 134.00
## fsiq4_all 7 61 102.84 17.03 105.00 103.57 17.79 64.00 131.00
## A_pct_severity 8 62 0.25 0.12 0.24 0.25 0.13 0.04 0.65
## B_pct_severity 9 62 0.24 0.13 0.23 0.23 0.11 0.00 0.67
## ADI_social_total 10 62 14.60 6.35 15.50 14.80 6.67 1.00 27.00
## ADI_communication_total 11 62 11.61 5.70 11.00 11.54 5.93 0.00 24.00
## ADI_RRB_total 12 62 4.00 2.32 4.00 3.96 2.22 0.00 9.00
## ados_2_SA_CSS 13 60 5.57 2.51 6.00 5.62 2.97 1.00 10.00
## ados_2_RRB_CSS 14 60 5.07 2.46 6.00 5.08 1.48 1.00 10.00
## SRS_tscore 15 56 65.89 11.23 67.00 65.72 12.60 43.00 90.00
## SRS_tscore_self 16 30 60.97 7.28 61.50 61.12 6.67 46.00 79.00
## RBS_total 17 53 13.47 11.19 11.00 12.23 10.38 0.00 52.00
## SSP_total 18 35 139.77 27.00 142.00 141.76 34.10 69.00 177.00
## vabsdscoresc_dss 19 56 82.18 14.63 80.00 81.50 14.83 50.00 122.00
## vabsdscoresd_dss 20 55 78.85 15.95 77.00 78.20 11.86 38.00 119.00
## vabsdscoress_dss 21 56 76.27 15.44 77.50 77.17 13.34 28.00 101.00
## vabsabcabc_standard 22 55 78.31 12.58 77.00 77.87 8.90 48.00 117.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 23.03 0.39 -0.66 0.75
## meanFD 1.55 3.88 19.57 0.03
## viq_all 63.00 -0.17 -0.92 2.05
## piq_all 82.00 -0.54 -0.08 2.43
## fsiq4_all 67.00 -0.35 -0.63 2.18
## A_pct_severity 0.61 0.44 0.30 0.02
## B_pct_severity 0.67 0.69 0.92 0.02
## ADI_social_total 26.00 -0.24 -0.83 0.81
## ADI_communication_total 24.00 0.10 -0.71 0.72
## ADI_RRB_total 9.00 0.18 -0.60 0.29
## ados_2_SA_CSS 9.00 -0.21 -0.89 0.32
## ados_2_RRB_CSS 9.00 -0.45 -0.53 0.32
## SRS_tscore 47.00 0.09 -0.83 1.50
## SRS_tscore_self 33.00 0.02 -0.02 1.33
## RBS_total 52.00 1.26 1.86 1.54
## SSP_total 108.00 -0.58 -0.49 4.56
## vabsdscoresc_dss 72.00 0.50 0.04 1.96
## vabsdscoresd_dss 81.00 0.29 0.37 2.15
## vabsdscoress_dss 73.00 -0.77 1.19 2.06
## vabsabcabc_standard 69.00 0.46 0.53 1.70
## ------------------------------------------------------------
## subgrp: SC_over_RRB
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 68 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 68 NaN NA NA NaN NA Inf -Inf
## age 3 68 16.10 5.12 15.63 15.96 6.08 7.48 29.23
## meanFD 4 68 0.26 0.28 0.17 0.20 0.11 0.04 1.59
## viq_all 5 65 96.31 19.73 99.00 97.55 20.76 50.91 130.00
## piq_all 6 67 98.83 20.92 104.00 100.28 20.76 44.03 138.00
## fsiq4_all 7 66 98.22 19.41 103.00 99.20 19.46 59.00 139.00
## A_pct_severity 8 68 0.46 0.15 0.46 0.46 0.19 0.19 0.75
## B_pct_severity 9 68 0.20 0.12 0.18 0.19 0.13 0.00 0.47
## ADI_social_total 10 68 18.47 5.69 19.00 18.66 5.93 6.00 29.00
## ADI_communication_total 11 68 14.82 4.71 15.50 14.96 5.19 4.00 24.00
## ADI_RRB_total 12 68 3.81 2.46 3.50 3.66 2.22 0.00 10.00
## ados_2_SA_CSS 13 65 6.18 2.83 6.00 6.28 4.45 1.00 10.00
## ados_2_RRB_CSS 14 65 4.45 2.80 5.00 4.32 2.97 1.00 9.00
## SRS_tscore 15 59 74.73 11.62 78.00 75.53 13.34 48.00 90.00
## SRS_tscore_self 16 32 63.16 10.33 61.50 62.96 8.90 40.00 84.00
## RBS_total 17 59 19.02 15.25 15.00 17.43 13.34 0.00 73.00
## SSP_total 18 46 139.11 25.30 139.50 139.42 28.17 91.00 184.00
## vabsdscoresc_dss 19 62 73.24 14.96 74.50 73.82 11.12 21.00 110.00
## vabsdscoresd_dss 20 62 70.98 15.74 68.00 70.62 15.57 42.00 118.00
## vabsdscoress_dss 21 62 66.32 16.12 67.50 66.42 14.83 23.00 112.00
## vabsabcabc_standard 22 62 68.23 13.79 68.50 68.58 11.12 28.00 107.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 21.75 0.29 -0.78 0.62
## meanFD 1.54 3.07 10.17 0.03
## viq_all 79.09 -0.54 -0.55 2.45
## piq_all 93.97 -0.58 -0.48 2.56
## fsiq4_all 80.00 -0.44 -0.73 2.39
## A_pct_severity 0.56 -0.01 -1.04 0.02
## B_pct_severity 0.47 0.37 -0.77 0.01
## ADI_social_total 23.00 -0.32 -0.74 0.69
## ADI_communication_total 20.00 -0.28 -0.83 0.57
## ADI_RRB_total 10.00 0.53 -0.53 0.30
## ados_2_SA_CSS 9.00 -0.17 -1.19 0.35
## ados_2_RRB_CSS 8.00 -0.09 -1.37 0.35
## SRS_tscore 42.00 -0.50 -0.78 1.51
## SRS_tscore_self 44.00 0.17 -0.27 1.83
## RBS_total 73.00 1.14 1.08 1.99
## SSP_total 93.00 -0.16 -0.94 3.73
## vabsdscoresc_dss 89.00 -0.70 2.19 1.90
## vabsdscoresd_dss 76.00 0.39 -0.11 2.00
## vabsdscoress_dss 89.00 -0.04 0.39 2.05
## vabsabcabc_standard 79.00 -0.27 1.23 1.75
## ------------------------------------------------------------
## subgrp: TD
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 122 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 122 NaN NA NA NaN NA Inf -Inf
## age 3 122 16.86 6.07 16.34 16.58 7.59 6.89 29.72
## meanFD 4 122 0.23 0.46 0.14 0.15 0.07 0.04 4.60
## viq_all 5 122 104.02 17.58 108.18 105.64 12.13 45.00 140.00
## piq_all 6 122 104.64 18.41 108.96 106.56 14.08 49.00 139.00
## fsiq4_all 7 122 104.94 17.14 108.09 107.29 11.86 50.00 134.00
## A_pct_severity 8 0 NaN NA NA NaN NA Inf -Inf
## B_pct_severity 9 0 NaN NA NA NaN NA Inf -Inf
## ADI_social_total 10 0 NaN NA NA NaN NA Inf -Inf
## ADI_communication_total 11 0 NaN NA NA NaN NA Inf -Inf
## ADI_RRB_total 12 0 NaN NA NA NaN NA Inf -Inf
## ados_2_SA_CSS 13 0 NaN NA NA NaN NA Inf -Inf
## ados_2_RRB_CSS 14 0 NaN NA NA NaN NA Inf -Inf
## SRS_tscore 15 65 47.23 9.34 44.00 45.66 4.45 37.00 90.00
## SRS_tscore_self 16 61 48.44 6.84 47.00 47.63 5.93 39.00 69.00
## RBS_total 17 63 3.08 11.54 0.00 0.86 0.00 0.00 89.00
## SSP_total 18 54 174.93 19.38 182.00 178.41 6.67 75.00 190.00
## vabsdscoresc_dss 19 39 92.74 25.45 96.00 95.82 20.76 21.00 125.00
## vabsdscoresd_dss 20 39 91.10 22.65 97.00 93.91 14.83 27.00 122.00
## vabsdscoress_dss 21 39 98.90 27.04 103.00 102.12 17.79 20.00 132.00
## vabsabcabc_standard 22 38 93.00 25.39 100.00 96.44 15.57 20.00 126.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 22.83 0.33 -0.97 0.55
## meanFD 4.56 7.54 64.83 0.04
## viq_all 95.00 -0.99 1.51 1.59
## piq_all 90.00 -0.93 0.67 1.67
## fsiq4_all 84.00 -1.28 1.67 1.55
## A_pct_severity -Inf NA NA NA
## B_pct_severity -Inf NA NA NA
## ADI_social_total -Inf NA NA NA
## ADI_communication_total -Inf NA NA NA
## ADI_RRB_total -Inf NA NA NA
## ados_2_SA_CSS -Inf NA NA NA
## ados_2_RRB_CSS -Inf NA NA NA
## SRS_tscore 53.00 2.14 5.82 1.16
## SRS_tscore_self 30.00 1.05 0.48 0.88
## RBS_total 89.00 6.60 45.89 1.45
## SSP_total 115.00 -2.88 10.92 2.64
## vabsdscoresc_dss 104.00 -1.21 1.00 4.08
## vabsdscoresd_dss 95.00 -1.34 1.26 3.63
## vabsdscoress_dss 112.00 -1.26 1.10 4.33
## vabsabcabc_standard 106.00 -1.42 1.40 4.12
#------------------------------------------------------------------------------
# Table of subtypes X sex
# Discovery
data2use = subset(data2write,data2write$dataset=="Discovery")
table(data2use$subgrp,data2use$sex)
##
## Female Male
## RRB_over_SC 4 6
## SC_equal_RRB 15 41
## SC_over_RRB 16 51
## TD 41 80
cs_res = chisq.test(data2use$subgrp,data2use$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: data2use$subgrp and data2use$sex
## X-squared = 2.8193, df = 3, p-value = 0.4203
# Replication
data2use = subset(data2write,data2write$dataset=="Replication")
table(data2use$subgrp,data2use$sex)
##
## Female Male
## RRB_over_SC 1 2
## SC_equal_RRB 15 47
## SC_over_RRB 20 48
## TD 47 75
cs_res = chisq.test(data2use$subgrp,data2use$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: data2use$subgrp and data2use$sex
## X-squared = 4.2615, df = 3, p-value = 0.2346
#------------------------------------------------------------------------------
vars2analyze = c("age","meanFD","viq_all","piq_all","fsiq4_all",
"A_pct_severity","B_pct_severity",
"ADI_social_total","ADI_communication_total","ADI_RRB_total",
"ados_2_SA_CSS","ados_2_RRB_CSS",
"SRS_tscore_self","RBS_total","SSP_total",
"vabsdscoress_dss","vabsdscoresd_dss","vabsdscoresc_dss","vabsabcabc_standard")
vnames = c("Age","Mean FD","VIQ","PIQ","FIQ","ADI-R SC","ADI-R RRB","ADI-R Social","ADI-R Communication","ADI-R RRB","ADOS SA CSS","ADOS RRB CSS","SRS","RBS","SSP","Vineland Socialization","Vineland Daily Living Skills","Vineland Communication","Vineland ABC")
cnames = c("All_Disc.fstat","All_Disc.pval","All_Rep.fstat","All_Rep.pval",
"SCequalRRB_vs_SCoverRRB_Disc.fstat","SCequalRRB_vs_SCoverRRB_Disc.tstat",
"SCequalRRB_vs_SCoverRRB_Disc.pval","SCequalRRB_vs_SCoverRRB_Disc.es",
"SCequalRRB_vs_SCoverRRB_Rep.fstat","SCequalRRB_vs_SCoverRRB_Rep.tstat",
"SCequalRRB_vs_SCoverRRB_Rep.pval","SCequalRRB_vs_SCoverRRB_Rep.es",
"SCequalRRB_vs_SCoverRRB.repBF")
output_res = data.frame(matrix(nrow = length(vars2analyze),ncol = length(cnames)))
colnames(output_res) = cnames
rownames(output_res) = vars2analyze
output_res$varNames = vars2analyze
for (ivar in 1:length(vars2analyze)){
y_var = vars2analyze[ivar]
# print(y_var)
#----------------------------------------------------------------------------
# Discovery
df4mod = subset(df2use, df2use$dataset=="Discovery")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"All_Disc.fstat"] = res["subgrp","F-value"]
output_res[y_var,"All_Disc.pval"] = res["subgrp","p-value"]
#----------------------------------------------------------------------------
# Replication
df4mod = subset(df2use, df2use$dataset=="Replication")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"All_Rep.fstat"] = res["subgrp","F-value"]
output_res[y_var,"All_Rep.pval"] = res["subgrp","p-value"]
#----------------------------------------------------------------------------
# Discovery
df4mod = subset(df2use, df2use$dataset=="Discovery" & !is.element(df2use$subgrp,c("TD","RRB_over_SC")))
n1 = sum(df4mod$subgrp=="SC_equal_RRB")
m1 = sum(df4mod$subgrp=="SC_over_RRB")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.fstat"] = res["subgrp","F-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.pval"] = res["subgrp","p-value"]
res = summary(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"] = res$tTable[2,"t-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.es"] = cohens_d(df4mod[df4mod$subgrp=="SC_equal_RRB",y_var],
df4mod[df4mod$subgrp=="SC_over_RRB",y_var])
#----------------------------------------------------------------------------
# Replication
df4mod = subset(df2use, df2use$dataset=="Replication" & !is.element(df2use$subgrp,c("TD","RRB_over_SC")))
n2 = sum(df4mod$subgrp=="SC_equal_RRB")
m2 = sum(df4mod$subgrp=="SC_over_RRB")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.fstat"] = res["subgrp","F-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.pval"] = res["subgrp","p-value"]
res = summary(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.tstat"] = res$tTable[2,"t-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.es"] = cohens_d(df4mod[df4mod$subgrp=="SC_equal_RRB",y_var],
df4mod[df4mod$subgrp=="SC_over_RRB",y_var])
#----------------------------------------------------------------------------
# Replication Bayes Factor
res_bf = BFSALL(tobs = output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"],
trep = output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"],
n1 = n1,
n2 = n2,
m1 = m1,
m2 = m2,
sample = 2,
Type = 'ALL')
output_res[y_var,"SCequalRRB_vs_SCoverRRB.repBF"] = res_bf["Replication BF","Replication 1"]
# make a plot
colors2use = get_ggColorHue(3)
df4plot = subset(df2use, df2use$subgrp!="RRB_over_SC")
p = ggplot(data = df4plot, aes_string(x = "subgrp", y = y_var, colour = "subgrp")) + facet_grid(. ~ dataset)
p = p + geom_jitter() + geom_boxplot(fill = NA, colour = "#000000", outlier.shape = NA)
p = p + ylab(vnames[ivar]) + xlab("Group") +
scale_x_discrete(labels=c("SC_equal_RRB"="SC=RRB","SC_over_RRB"="SC>RRB","TD"="TD")) +
scale_colour_manual(values = c(colors2use[2:3],"grey60")) + guides(colour=FALSE) +
theme(text = element_text(size=fontSize-5),
axis.text.x = element_text(size=fontSize-5),
axis.text.y = element_text(size=fontSize-5))
print(p)
}
vabc["0.7","Discovery"] = output_res["vabsabcabc_standard","SCequalRRB_vs_SCoverRRB_Disc.es"]
vabc["0.7","Replication"] = output_res["vabsabcabc_standard","SCequalRRB_vs_SCoverRRB_Rep.es"]
vabc_dls["0.7","Discovery"] = output_res["vabsdscoresd_dss","SCequalRRB_vs_SCoverRRB_Disc.es"]
vabc_dls["0.7","Replication"] = output_res["vabsdscoresd_dss","SCequalRRB_vs_SCoverRRB_Rep.es"]
output_res
## All_Disc.fstat All_Disc.pval All_Rep.fstat All_Rep.pval
## age 0.4443736 7.214972e-01 0.57911736 6.292321e-01
## meanFD 2.3454190 7.345813e-02 0.09283242 9.639462e-01
## viq_all 2.0746336 1.041775e-01 1.96069026 1.204982e-01
## piq_all 1.5490396 2.024688e-01 0.92349384 4.300367e-01
## fsiq4_all 2.2140636 8.706621e-02 1.32551779 2.666330e-01
## A_pct_severity 28.5757233 5.661294e-11 55.06317758 0.000000e+00
## B_pct_severity 30.4467534 1.578115e-11 3.65737898 2.855480e-02
## ADI_social_total 0.9981866 3.714198e-01 9.46612207 1.472079e-04
## ADI_communication_total 1.1377968 3.237695e-01 11.08966914 3.639446e-05
## ADI_RRB_total 34.6469967 9.816592e-13 0.44356732 6.427334e-01
## ados_2_SA_CSS 2.1610826 1.195199e-01 1.34800309 2.635994e-01
## ados_2_RRB_CSS 1.1598655 3.169061e-01 0.85013622 4.298725e-01
## SRS_tscore_self 37.7795551 0.000000e+00 32.89311674 1.776357e-15
## RBS_total 18.8124369 1.387963e-10 15.53716896 5.618780e-09
## SSP_total 31.9881267 1.554312e-15 22.88230998 6.086132e-12
## vabsdscoress_dss 22.9718095 2.712497e-12 24.80873326 3.891332e-13
## vabsdscoresd_dss 11.6170742 6.934610e-07 10.55576140 2.398491e-06
## vabsdscoresc_dss 9.4813361 8.952522e-06 8.85284807 1.900485e-05
## vabsabcabc_standard 17.8307895 6.086744e-10 17.49695128 8.412286e-10
## SCequalRRB_vs_SCoverRRB_Disc.fstat
## age 7.735489e-03
## meanFD 8.606177e-01
## viq_all 1.056295e-02
## piq_all 1.459087e-02
## fsiq4_all 9.379364e-04
## A_pct_severity 3.250674e+01
## B_pct_severity 3.873294e+01
## ADI_social_total 1.235755e+00
## ADI_communication_total 1.982731e+00
## ADI_RRB_total 3.720159e+01
## ados_2_SA_CSS 1.443778e-01
## ados_2_RRB_CSS 4.365431e-01
## SRS_tscore_self 2.298479e+00
## RBS_total 2.171167e-01
## SSP_total 1.451255e+00
## vabsdscoress_dss 1.824253e+00
## vabsdscoresd_dss 2.978555e-01
## vabsdscoresc_dss 9.320227e-02
## vabsabcabc_standard 1.294766e+00
## SCequalRRB_vs_SCoverRRB_Disc.tstat
## age -0.08795163
## meanFD -0.92769484
## viq_all 0.10277623
## piq_all -0.12079267
## fsiq4_all 0.03062575
## A_pct_severity 5.70146849
## B_pct_severity -6.22357950
## ADI_social_total 1.11164497
## ADI_communication_total 1.40809488
## ADI_RRB_total -6.09931090
## ados_2_SA_CSS 0.37997078
## ados_2_RRB_CSS -0.66071411
## SRS_tscore_self 1.51607362
## RBS_total -0.46595786
## SSP_total 1.20468026
## vabsdscoress_dss -1.35064915
## vabsdscoresd_dss -0.54576142
## vabsdscoresc_dss -0.30529046
## vabsabcabc_standard -1.13787792
## SCequalRRB_vs_SCoverRRB_Disc.pval
## age 9.300642e-01
## meanFD 3.554596e-01
## viq_all 9.183180e-01
## piq_all 9.040642e-01
## fsiq4_all 9.756197e-01
## A_pct_severity 8.929487e-08
## B_pct_severity 7.669901e-09
## ADI_social_total 2.685506e-01
## ADI_communication_total 1.617322e-01
## ADI_RRB_total 1.388656e-08
## ados_2_SA_CSS 7.046679e-01
## ados_2_RRB_CSS 5.101171e-01
## SRS_tscore_self 1.356750e-01
## RBS_total 6.422590e-01
## SSP_total 2.323840e-01
## vabsdscoress_dss 1.795305e-01
## vabsdscoresd_dss 5.863251e-01
## vabsdscoresc_dss 7.607116e-01
## vabsabcabc_standard 2.576216e-01
## SCequalRRB_vs_SCoverRRB_Disc.es
## age 0.015924500
## meanFD 0.167967896
## viq_all -0.053002168
## piq_all 0.021308962
## fsiq4_all -0.005545139
## A_pct_severity -0.788506161
## B_pct_severity 1.179425752
## ADI_social_total -0.044849376
## ADI_communication_total -0.028505053
## ADI_RRB_total 1.174142890
## ados_2_SA_CSS -0.059546879
## ados_2_RRB_CSS 0.155478951
## SRS_tscore_self -0.167048495
## RBS_total 0.174333540
## SSP_total -0.320171280
## vabsdscoress_dss 0.121774707
## vabsdscoresd_dss -0.002137628
## vabsdscoresc_dss 0.049639893
## vabsabcabc_standard 0.128410012
## SCequalRRB_vs_SCoverRRB_Rep.fstat
## age 5.468116e-01
## meanFD 2.977673e-01
## viq_all 2.006791e+00
## piq_all 7.424804e-01
## fsiq4_all 1.054057e+00
## A_pct_severity 1.037983e+02
## B_pct_severity 2.356628e+00
## ADI_social_total 1.852925e+01
## ADI_communication_total 1.857849e+01
## ADI_RRB_total 1.400011e-03
## ados_2_SA_CSS 1.777220e+00
## ados_2_RRB_CSS 1.668320e+00
## SRS_tscore_self 9.192707e-01
## RBS_total 8.944074e+00
## SSP_total 1.822205e-01
## vabsdscoress_dss 1.613878e+01
## vabsdscoresd_dss 1.028992e+01
## vabsdscoresc_dss 1.072178e+01
## vabsabcabc_standard 2.238903e+01
## SCequalRRB_vs_SCoverRRB_Rep.tstat
## age -0.73946708
## meanFD 0.54568060
## viq_all -1.41661237
## piq_all -0.86167302
## fsiq4_all -1.02667265
## A_pct_severity 10.18814623
## B_pct_severity -1.53513138
## ADI_social_total 4.30456208
## ADI_communication_total 4.31027765
## ADI_RRB_total 0.03741673
## ados_2_SA_CSS 1.33312422
## ados_2_RRB_CSS -1.29163473
## SRS_tscore_self 0.95878608
## RBS_total 2.99066443
## SSP_total -0.42687292
## vabsdscoress_dss -4.01731065
## vabsdscoresd_dss -3.20779037
## vabsdscoresc_dss -3.27441347
## vabsabcabc_standard -4.73170469
## SCequalRRB_vs_SCoverRRB_Rep.pval
## age 4.610099e-01
## meanFD 5.862580e-01
## viq_all 1.591657e-01
## piq_all 3.905442e-01
## fsiq4_all 3.066059e-01
## A_pct_severity 0.000000e+00
## B_pct_severity 1.272789e-01
## ADI_social_total 3.346762e-05
## ADI_communication_total 3.272369e-05
## ADI_RRB_total 9.702124e-01
## ados_2_SA_CSS 1.850164e-01
## ados_2_RRB_CSS 1.989659e-01
## SRS_tscore_self 3.417159e-01
## RBS_total 3.455054e-03
## SSP_total 6.706787e-01
## vabsdscoress_dss 1.063990e-04
## vabsdscoresd_dss 1.744364e-03
## vabsdscoresc_dss 1.405708e-03
## vabsabcabc_standard 6.546637e-06
## SCequalRRB_vs_SCoverRRB_Rep.es
## age 0.12984949
## meanFD -0.09582082
## viq_all 0.30061512
## piq_all 0.22307565
## fsiq4_all 0.25203121
## A_pct_severity -1.55030116
## B_pct_severity 0.36362290
## ADI_social_total -0.64438806
## ADI_communication_total -0.61676272
## ADI_RRB_total 0.07980667
## ados_2_SA_CSS -0.23061201
## ados_2_RRB_CSS 0.23515014
## SRS_tscore_self -0.24311317
## RBS_total -0.41169112
## SSP_total 0.02536933
## vabsdscoress_dss 0.62933627
## vabsdscoresd_dss 0.49686127
## vabsdscoresc_dss 0.60368112
## vabsabcabc_standard 0.76211234
## SCequalRRB_vs_SCoverRRB.repBF varNames
## age 6.992496e-01 age
## meanFD 1.069978e+00 meanFD
## viq_all 6.991060e-01 viq_all
## piq_all 7.014257e-01 piq_all
## fsiq4_all 6.964927e-01 fsiq4_all
## A_pct_severity 1.424045e+06 A_pct_severity
## B_pct_severity 1.631060e+07 B_pct_severity
## ADI_social_total 1.290726e+00 ADI_social_total
## ADI_communication_total 1.869007e+00 ADI_communication_total
## ADI_RRB_total 9.018462e+06 ADI_RRB_total
## ados_2_SA_CSS 7.482395e-01 ados_2_SA_CSS
## ados_2_RRB_CSS 8.654343e-01 ados_2_RRB_CSS
## SRS_tscore_self 2.186721e+00 SRS_tscore_self
## RBS_total 7.758777e-01 RBS_total
## SSP_total 1.435949e+00 SSP_total
## vabsdscoress_dss 1.724381e+00 vabsdscoress_dss
## vabsdscoresd_dss 8.084267e-01 vabsdscoresd_dss
## vabsdscoresc_dss 7.292319e-01 vabsdscoresc_dss
## vabsabcabc_standard 1.326929e+00 vabsabcabc_standard
#------------------------------------------------------------------------------
# Z-score threshold to use for subtyping
z_thresh = 0.8
# vars2use = c("dbaes_atotal","dbaes_btotal")
# compute Discovery mean and sd
ds_disc = Dverbal_Discovery[,vars2use[1]] - Dverbal_Discovery[,vars2use[2]]
mean2use = mean(ds_disc)
sd2use = sd(ds_disc)
Dverbal_Discovery = make_subtype(data2use = Dverbal_Discovery,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
# compute Replication mean and sd
ds_rep = Dverbal_Replication[,vars2use[1]] - Dverbal_Replication[,vars2use[2]]
mean2use = mean(ds_rep)
sd2use = sd(ds_rep)
Dverbal_Replication = make_subtype(data2use = Dverbal_Replication,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#------------------------------------------------------------------------------
# Table of subtypes X sex
# Discovery
table(Dverbal_Discovery$z_ds_group,Dverbal_Discovery$sex)
##
## F M
## RRB_over_SC 35 143
## SC_equal_RRB 125 413
## SC_over_RRB 37 136
cs_res = chisq.test(Dverbal_Discovery$z_ds_group,Dverbal_Discovery$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: Dverbal_Discovery$z_ds_group and Dverbal_Discovery$sex
## X-squared = 1.0632, df = 2, p-value = 0.5877
# Replication
table(Dverbal_Replication$z_ds_group,Dverbal_Replication$sex)
##
## F M
## RRB_over_SC 31 146
## SC_equal_RRB 130 422
## SC_over_RRB 35 126
cs_res = chisq.test(Dverbal_Replication$z_ds_group,Dverbal_Replication$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: Dverbal_Replication$z_ds_group and Dverbal_Replication$sex
## X-squared = 2.8532, df = 2, p-value = 0.2401
#------------------------------------------------------------------------------
# Descriptive stats
# Discovery
df2use = Dverbal_Discovery[,c("subjectkey","dataset_id","collection_id","interview_age","sex","z_ds_group","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")]
df2use$age = df2use$interview_age/12
cols2use = c("z_ds_group","sex","age","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")
res=describeBy(x = df2use[,cols2use], group = "z_ds_group")
res
##
## Descriptive statistics by group
## group: RRB_over_SC
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 178 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00
## sex* 2 178 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 178 10.05 4.11 9.50 9.76 3.46 2.00 27.17 25.17
## ados_age 4 25 91.60 38.51 86.00 89.52 38.55 37.00 171.00 134.00
## ados_sa_css 5 25 6.44 2.45 7.00 6.52 2.97 2.00 10.00 8.00
## ados_rrb_css 6 25 7.52 2.14 8.00 7.76 1.48 1.00 10.00 9.00
## iq 7 47 101.53 15.74 101.00 102.41 16.31 54.00 139.00 85.00
## dbaes_atotal 8 178 0.22 0.11 0.21 0.21 0.12 0.01 0.48 0.46
## dbaes_btotal 9 178 0.48 0.12 0.48 0.48 0.14 0.25 0.79 0.54
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 0.86 1.27 0.31
## ados_age 0.48 -0.88 7.70
## ados_sa_css -0.26 -1.21 0.49
## ados_rrb_css -1.12 1.22 0.43
## iq -0.55 0.78 2.30
## dbaes_atotal 0.13 -0.79 0.01
## dbaes_btotal 0.22 -0.64 0.01
## ------------------------------------------------------------
## group: SC_equal_RRB
## vars n mean sd median trimmed mad min max range skew
## z_ds_group* 1 538 2.00 0.00 2.00 2.00 0.00 2 2.00 0.00 NaN
## sex* 2 538 NaN NA NA NaN NA Inf -Inf -Inf NA
## age 3 538 9.06 5.47 8.08 8.30 4.69 0 45.75 45.75 1.99
## ados_age 4 85 82.36 45.77 71.00 77.48 47.44 27 202.00 175.00 0.73
## ados_sa_css 5 85 6.81 2.06 7.00 6.90 1.48 1 10.00 9.00 -0.34
## ados_rrb_css 6 85 7.67 2.30 8.00 8.06 1.48 1 10.00 9.00 -1.59
## iq 7 131 104.88 18.15 107.00 106.22 17.79 42 138.00 96.00 -0.84
## dbaes_atotal 8 538 0.30 0.14 0.30 0.30 0.14 0 0.70 0.70 0.03
## dbaes_btotal 9 538 0.31 0.14 0.31 0.32 0.14 0 0.68 0.68 -0.07
## kurtosis se
## z_ds_group* NaN 0.00
## sex* NA NA
## age 6.78 0.24
## ados_age -0.67 4.96
## ados_sa_css -0.26 0.22
## ados_rrb_css 2.36 0.25
## iq 1.08 1.59
## dbaes_atotal -0.28 0.01
## dbaes_btotal -0.31 0.01
## ------------------------------------------------------------
## group: SC_over_RRB
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 173 3.00 0.00 3.00 3.00 0.00 3.00 3.00 0.00
## sex* 2 173 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 173 7.31 5.33 5.75 6.32 2.97 1.67 37.33 35.67
## ados_age 4 43 67.98 32.79 65.00 62.74 22.24 30.00 172.00 142.00
## ados_sa_css 5 43 7.49 1.52 7.00 7.51 1.48 4.00 10.00 6.00
## ados_rrb_css 6 43 8.21 1.73 8.00 8.40 1.48 1.00 10.00 9.00
## iq 7 21 103.29 21.68 111.00 104.71 19.27 40.00 140.00 100.00
## dbaes_atotal 8 173 0.49 0.13 0.48 0.49 0.12 0.20 0.87 0.67
## dbaes_btotal 9 173 0.22 0.10 0.23 0.22 0.10 0.00 0.47 0.47
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 2.45 7.70 0.40
## ados_age 1.57 2.41 5.00
## ados_sa_css -0.08 -0.68 0.23
## ados_rrb_css -1.67 4.79 0.26
## iq -0.91 1.18 4.73
## dbaes_atotal 0.31 -0.14 0.01
## dbaes_btotal 0.12 -0.30 0.01
# Replication
df2use = Dverbal_Replication[,c("subjectkey","dataset_id","collection_id","interview_age","sex","z_ds_group","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")]
df2use$age = df2use$interview_age/12
cols2use = c("z_ds_group","sex","age","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")
res=describeBy(x = df2use[,cols2use], group = "z_ds_group")
res
##
## Descriptive statistics by group
## group: RRB_over_SC
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 177 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00
## sex* 2 177 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 177 9.92 4.68 9.33 9.46 3.83 3.00 28.58 25.58
## ados_age 4 14 92.43 52.75 79.00 89.08 58.56 36.00 189.00 153.00
## ados_sa_css 5 14 6.64 2.21 7.00 6.67 2.22 3.00 10.00 7.00
## ados_rrb_css 6 14 6.93 1.44 7.00 6.83 0.74 5.00 10.00 5.00
## iq 7 55 102.98 18.88 104.00 103.76 16.31 57.00 152.00 95.00
## dbaes_atotal 8 177 0.22 0.11 0.21 0.21 0.10 0.01 0.61 0.60
## dbaes_btotal 9 177 0.50 0.13 0.49 0.49 0.11 0.21 0.93 0.72
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 1.16 2.07 0.35
## ados_age 0.38 -1.40 14.10
## ados_sa_css -0.34 -1.18 0.59
## ados_rrb_css 0.40 -0.49 0.38
## iq -0.28 0.65 2.55
## dbaes_atotal 0.80 1.21 0.01
## dbaes_btotal 0.45 0.64 0.01
## ------------------------------------------------------------
## group: SC_equal_RRB
## vars n mean sd median trimmed mad min max range skew
## z_ds_group* 1 552 2.00 0.00 2.00 2.00 0.00 2 2.00 0.00 NaN
## sex* 2 552 NaN NA NA NaN NA Inf -Inf -Inf NA
## age 3 552 8.84 5.22 7.88 8.09 4.45 0 33.83 33.83 1.64
## ados_age 4 107 80.40 38.24 74.00 76.39 43.00 35 196.00 161.00 0.82
## ados_sa_css 5 107 6.84 2.03 7.00 6.87 1.48 2 10.00 8.00 -0.02
## ados_rrb_css 6 107 7.37 2.37 8.00 7.71 1.48 1 10.00 9.00 -1.31
## iq 7 108 107.71 15.98 108.50 107.65 14.08 64 146.00 82.00 -0.02
## dbaes_atotal 8 552 0.31 0.14 0.31 0.31 0.13 0 0.78 0.78 0.04
## dbaes_btotal 9 552 0.33 0.14 0.32 0.33 0.14 0 0.81 0.81 0.05
## kurtosis se
## z_ds_group* NaN 0.00
## sex* NA NA
## age 3.65 0.22
## ados_age 0.02 3.70
## ados_sa_css -0.79 0.20
## ados_rrb_css 1.31 0.23
## iq -0.13 1.54
## dbaes_atotal -0.07 0.01
## dbaes_btotal 0.05 0.01
## ------------------------------------------------------------
## group: SC_over_RRB
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 161 3.00 0.00 3.00 3.00 0.00 3.00 3.00 0.00
## sex* 2 161 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 161 7.95 6.37 6.00 6.65 3.34 2.08 40.92 38.83
## ados_age 4 32 67.81 25.08 62.50 64.73 21.50 30.00 141.00 111.00
## ados_sa_css 5 32 7.31 1.79 7.00 7.35 1.48 3.00 10.00 7.00
## ados_rrb_css 6 32 7.53 2.36 8.00 7.85 1.48 1.00 10.00 9.00
## iq 7 23 108.91 20.06 110.00 109.53 20.76 62.00 146.00 84.00
## dbaes_atotal 8 161 0.50 0.13 0.50 0.50 0.14 0.23 0.96 0.73
## dbaes_btotal 9 161 0.22 0.11 0.22 0.22 0.12 0.00 0.50 0.50
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 2.32 6.20 0.50
## ados_age 1.14 1.16 4.43
## ados_sa_css -0.16 -0.80 0.32
## ados_rrb_css -1.21 1.16 0.42
## iq -0.35 -0.57 4.18
## dbaes_atotal 0.30 0.18 0.01
## dbaes_btotal 0.11 -0.65 0.01
#------------------------------------------------------------------------------
# Tests of differences in interview age across the subtypes
# Discovery
mod2use = lm(formula = interview_age ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: interview_age
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 98234 49117 12.636 3.881e-06 ***
## Residuals 886 3443832 3887
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = interview_age ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: interview_age
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 47475 23737.6 5.7593 0.003273 **
## Residuals 887 3655889 4121.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in SC across the subtypes
# Discovery
mod2use = lm(formula = dbaes_atotal ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_atotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 7.3441 3.6721 220.92 < 2.2e-16 ***
## Residuals 886 14.7266 0.0166
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = dbaes_atotal ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_atotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 7.1446 3.5723 206.14 < 2.2e-16 ***
## Residuals 887 15.3708 0.0173
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in RRB across the subtypes
# Discovery
mod2use = lm(formula = dbaes_btotal ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_btotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 6.3201 3.16005 199.92 < 2.2e-16 ***
## Residuals 886 14.0047 0.01581
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = dbaes_btotal ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_btotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 6.7058 3.3529 190.36 < 2.2e-16 ***
## Residuals 887 15.6228 0.0176
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in ADOS SA CSS across the subtypes
# Discovery
mod2use = lm(formula = ados_sa_css ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_sa_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 20.58 10.2891 2.5813 0.07903 .
## Residuals 150 597.89 3.9859
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = ados_sa_css ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_sa_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 6.67 3.3352 0.8333 0.4366
## Residuals 150 600.39 4.0026
#------------------------------------------------------------------------------
# Tests of differences in ADOS RRB CSS across the subtypes
# Discovery
mod2use = lm(formula = ados_rrb_css ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_rrb_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 10.59 5.2931 1.1708 0.3129
## Residuals 150 678.13 4.5209
# Replication
mod2use = lm(formula = ados_rrb_css ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_rrb_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 3.56 1.7796 0.3362 0.715
## Residuals 150 793.94 5.2930
#------------------------------------------------------------------------------
# Tests of differences in IQ across the subtypes
# Discovery
mod2use = lm(formula = iq ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: iq
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 397 198.34 0.6109 0.5439
## Residuals 196 63632 324.65
# Replication
mod2use = lm(formula = iq ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: iq
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 973 486.66 1.6066 0.2034
## Residuals 183 55433 302.91
#------------------------------------------------------------------------------
# Make scatterplots with difference score Z subtypes in different colors
maxScores = c(3,4)
p_disc = ggplot(data = Dverbal_Discovery, aes(x = dbaes_atotal, y = dbaes_btotal, colour = z_ds_group)) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,1) + xlim(0,1) + ggtitle("NDAR Discovery")
p1_top_left = p_disc + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Disc_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p1_top_left)
p_disc
table(Dverbal_Discovery$z_ds_group)
##
## RRB_over_SC SC_equal_RRB SC_over_RRB
## 178 538 173
cor_res = cor.test(Dverbal_Discovery$dbaes_atotal, Dverbal_Discovery$dbaes_btotal)
cor_res
##
## Pearson's product-moment correlation
##
## data: Dverbal_Discovery$dbaes_atotal and Dverbal_Discovery$dbaes_btotal
## t = 6.6829, df = 887, p-value = 4.134e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1554332 0.2806578
## sample estimates:
## cor
## 0.2189469
p_rep = ggplot(data = Dverbal_Replication, aes(x = dbaes_atotal, y = dbaes_btotal, colour = z_ds_group)) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,1) + xlim(0,1) + ggtitle("NDAR Replication")
p2_bottom_left = p_rep + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Rep_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p2_bottom_left)
p_rep
table(Dverbal_Replication$z_ds_group)
##
## RRB_over_SC SC_equal_RRB SC_over_RRB
## 177 552 161
cor_res = cor.test(Dverbal_Replication$dbaes_atotal, Dverbal_Replication$dbaes_btotal)
cor_res
##
## Pearson's product-moment correlation
##
## data: Dverbal_Replication$dbaes_atotal and Dverbal_Replication$dbaes_btotal
## t = 7.1459, df = 888, p-value = 1.864e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1700824 0.2943934
## sample estimates:
## cor
## 0.2331903
#------------------------------------------------------------------------------
# Run supervised model with Discovery as training and Replication as Test
# run validation
# make subtypes using z-scores computed from the mean and sd of the training set
train_data = Dverbal_Discovery
test_data = Dverbal_Replication
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
tmp_train = make_subtype(data2use = train_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
mean2use = mean(test_data[,vars2use[1]] - test_data[,vars2use[2]])
sd2use = sd(test_data[,vars2use[1]] - test_data[,vars2use[2]])
tmp_test = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#==============================================================================
#==============================================================================
# make labels based on train mean and sd
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
train_mean = mean2use
train_sd = sd2use
pred_labels = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = train_mean,
sd2use = train_sd)
confmat = table(tmp_test$z_ds_group,pred_labels$z_ds_group)
acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/dim(pred_labels)[1]
# # compute model
# mod2use = svm(x = tmp_train[,vars2use], y = tmp_train$z_ds_group)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
#==============================================================================
#==============================================================================
# plot confusion matrix
setHook("grid.newpage", function() pushViewport(viewport(x=1,y=1,width=0.9, height=0.9, name="vp", just=c("right","top"))), action="prepend")
pheatmap(confmat/rowSums(confmat)*100, display_numbers = confmat, color = colorRampPalette(c('white','red'))(100), cluster_rows = FALSE, cluster_cols = FALSE, fontsize_number = fontSize, fontsize_row = fontSize, fontsize_col = fontSize,labels_row = c("RRB>SC","SC=RRB","SC>RRB"),labels_col = c("RRB>SC","SC=RRB","SC>RRB"),angle_col=90,
breaks= seq(0,100, length=100))
setHook("grid.newpage", NULL, "replace")
grid::grid.text("Actual Labels", y=-0.07, gp=gpar(fontsize=fontSize))
grid::grid.text("Predicted Labels", x=-0.07, rot=90, gp=gpar(fontsize=fontSize))
# show accuracy
true_accuracy = acc
true_accuracy
## [1] 0.9786517
#================================================================================
#================================================================================
# #------------------------------------------------------------------------------
# # Permute subtype labels to examine how well supervised model performs
#
# # nperm = 10000
#
# # make subtypes using z-scores computed from the mean and sd of the training set
# train_data = Dverbal_Discovery
# test_data = Dverbal_Replication
# mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
# sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
# tmp_train = make_subtype(data2use = train_data,
# z_thresh = z_thresh,
# mean2use = mean2use,
# sd2use = sd2use)
#
# mean2use = mean(test_data[,vars2use[1]] - test_data[,vars2use[2]])
# sd2use = sd(test_data[,vars2use[1]] - test_data[,vars2use[2]])
# tmp_test = make_subtype(data2use = test_data,
# z_thresh = z_thresh,
# mean2use = mean2use,
# sd2use = sd2use)
#
# acc = vector(length = nperm)
# for (iperm in 1:nperm){
# # set seed for reproducibility
# set.seed(iperm)
#
# sc_perm = sample(train_data[,vars2use[1]])
# rrb_perm = sample(train_data[,vars2use[2]])
# perm_mean2use = mean(sc_perm - rrb_perm)
# perm_sd2use = sd(sc_perm - rrb_perm)
# # perm_mean2use = mean(train_data[,vars2use[1]] - rrb_perm)
# # perm_sd2use = sd(train_data[,vars2use[1]] - rrb_perm)
# pred_labels = make_subtype(data2use = tmp_test,
# z_thresh = z_thresh,
# mean2use = perm_mean2use,
# sd2use = perm_sd2use)
# confmat = table(tmp_test$z_ds_group,pred_labels$z_ds_group)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/dim(pred_labels)[1]
#
# # compute model
# permuted_labels = sample(tmp_train$z_ds_group)
# mod2use = svm(x = tmp_train[,vars2use], y = permuted_labels)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc[iperm] = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
# #============================================================================
# } # for (iperm in 1:nperm)
#
# df2plot = data.frame(Accuracy = acc)
# p = ggplot(data = df2plot, aes(x = Accuracy)) + geom_histogram() + geom_vline(xintercept=true_accuracy)
# p
#
# # compute p-value
# pval = sum(c(true_accuracy,acc)>=true_accuracy)/(nperm+1)
# pval
#================================================================================
#================================================================================
#------------------------------------------------------------------------------
# Plot difference score Z subtypes in Discovery set
maxScores = c(3,4)
# Discovery - make plot with all individuals shown as lines
df2use = Dverbal_Discovery[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(tmp_train$z_ds_group)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + ylim(0,1) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p3_middle_top = p + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Disc_jitterplot_z%s.pdf",as.character(z_thresh))), plot = p3_middle_top)
p
# Plot difference score Z subtypes in Replication set
maxScores = c(3,4)
# Replication - make plot with all individuals shown as lines
df2use = Dverbal_Replication[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(tmp_test$z_ds_group)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + ylim(0,1) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p4_middle_bottom = p + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Rep_jitterplot_z%s.pdf",as.character(z_thresh))), plot = p4_middle_bottom)
p
#------------------------------------------------------------------------------
# Apply NDAR subtypes to EU-AIMS LEAP data
# make SC and RRB percentages since EU-AIMS data is specified as percentages
Dverbal = read.csv(file.path(datapath,"tidy_verbal.csv"))
euaims_data = read.csv(file.path(datapath,"tidy_euaims.csv"))
mask1 = euaims_data$Diagnosis=="ASD"
mask2 = (is.na(euaims_data$A1_pct_severity) | is.na(euaims_data$A2_pct_severity) | is.na(euaims_data$A3_pct_severity) | is.na(euaims_data$B1_pct_severity) | is.na(euaims_data$B2_pct_severity) | is.na(euaims_data$B3_pct_severity) | is.na(euaims_data$B4_pct_severity))
euaims_data = subset(euaims_data, (mask1 & !mask2))
Dverbal[,vars2use[1]] = Dverbal[,vars2use[1]]
Dverbal[,vars2use[2]] = Dverbal[,vars2use[2]]
euaims_data[,vars2use[1]] = (euaims_data$A1_pct_severity +
euaims_data$A2_pct_severity +
euaims_data$A3_pct_severity)/3
euaims_data[,vars2use[2]] = (euaims_data$B1_pct_severity +
euaims_data$B2_pct_severity +
euaims_data$B3_pct_severity +
euaims_data$B4_pct_severity)/4
train_data = Dverbal
test_data = euaims_data
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]], na.rm=TRUE)
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]], na.rm=TRUE)
c(mean2use, sd2use)
## [1] -0.01045243 0.19482749
tmp_train = make_subtype(data2use = train_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
tmp_test = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#===========================================================================
#===========================================================================
# # compute model
# mod2use = svm(x = tmp_train[,vars2use], y = tmp_train$z_ds_group)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
#
# tmp_test$svm_pred_labels = pred_labels
#
# # plot confusion matrix
# setHook("grid.newpage", function() pushViewport(viewport(x=1,y=1,width=0.9, height=0.9, name="vp", just=c("right","top"))), action="prepend")
# pheatmap(confmat/rowSums(confmat)*100, display_numbers = confmat, color = colorRampPalette(c('white','red'))(100), cluster_rows = FALSE, cluster_cols = FALSE, fontsize_number = fontSize, fontsize_row = fontSize, fontsize_col = fontSize,labels_row = c("RRB>SC","SC=RRB","SC>RRB"),labels_col = c("RRB>SC","SC=RRB","SC>RRB"),angle_col=90,
# breaks= seq(0,100, length=100))
# setHook("grid.newpage", NULL, "replace")
# grid::grid.text("Actual Labels", y=-0.07, gp=gpar(fontsize=fontSize))
# grid::grid.text("Predicted Labels", x=-0.07, rot=90, gp=gpar(fontsize=fontSize))
#===========================================================================
#===========================================================================
# scatterplot
p1 = ggplot(data = tmp_train, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(z_ds_group))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("NDAR ALL")
p1
p2 = ggplot(data = tmp_test, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(z_ds_group))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("EU-AIMS with Groups from NDAR ALL")
p2
#===========================================================================
#===========================================================================
# p3 = ggplot(data = tmp_test, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(pred_labels))) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("EU-AIMS")
# p5_bottom_right = p3 + guides(colour=FALSE)
# ggsave(filename = file.path(plotpath, sprintf("final_EUAIMS_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p5_bottom_right)
# p3
#===========================================================================
#===========================================================================
# # write out EU-AIMS LEAP data with subgroups defined by NDAR All
write.csv(tmp_test, file.path(datapath, sprintf("tidy_euaims_NDAR_subtypes_diffscore_z%s.csv",as.character(z_thresh))))
#===========================================================================
#===========================================================================
# # Make final plot
# p_final = p1_top_left + p3_middle_top + plot_spacer() + p2_bottom_left + p4_middle_bottom + p5_bottom_right + plot_layout(nrow=3, ncol=3, widths = c(4,4,4), heights = c(8,8,8))
# ggsave(filename = file.path(plotpath, sprintf("final_NDAR_EUAIMS_subtypes_plot_z%s.pdf",as.character(z_thresh))), plot = p_final)
# p_final
#===========================================================================
#===========================================================================
#------------------------------------------------------------------------------
# Integrate subtypes with rest of EU-AIMS LEAP data
euaims_data = read.csv(file.path(datapath,"tidy_euaims.csv"))
td_df = subset(euaims_data, euaims_data$Diagnosis=="TD",select=2:6)
td_df$A_pct_severity = as.numeric(NA)
td_df$B_pct_severity = as.numeric(NA)
td_df$subgrp = "TD"
#===========================================================================
#===========================================================================
# cols2use = c("subid","age","Centre","Schedule","Diagnosis","dbaes_atotal","dbaes_btotal","svm_pred_labels")
cols2use = c("subid","age","Centre","Schedule","Diagnosis","dbaes_atotal","dbaes_btotal","z_ds_group")
#===========================================================================
#===========================================================================
tmp_asd = tmp_test[,cols2use]
colnames(tmp_asd)[6] = "A_pct_severity"
colnames(tmp_asd)[7] = "B_pct_severity"
colnames(tmp_asd)[8] = "subgrp"
asd_df = tmp_asd
all_data = rbind(td_df,asd_df)
fname = "/Users/mlombardo/Dropbox/euaims/data/rsfmri_preproc/euaims_preproc.xlsx"
pp_data = read_excel(fname)
mask = pp_data$notes=="ok"
pp_data = subset(pp_data, mask)
asd_df = merge(pp_data[,c("subid","sex")],asd_df, by = "subid")
td_df = merge(pp_data[,c("subid","sex")],td_df, by = "subid")
all_data = rbind(td_df,asd_df)
data2write = merge(pp_data, all_data, by = "subid")
data2write$age = data2write$age.x
data2write$sex = data2write$sex.y
print(table(data2write$Schedule, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## A 6 39 35 78
## B 1 47 37 83
## C 4 33 26 59
## D 0 13 25 23
print(table(data2write$Centre, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## CAMBRIDGE 4 29 9 29
## KINGS_COLLEGE 6 51 49 78
## MANNHEIM 0 0 0 34
## NIJMEGEN 1 41 51 64
## UTRECHT 0 11 14 38
print(table(data2write$sex, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## Female 5 33 33 88
## Male 6 99 90 155
#DSM-5 - find best split that balances participants across sites
a = findBestSplit(asd_df, seed_range = c(172342)) #,300001:500000))
## [1] 172342
best_seeds = a$seed[!is.na(a$discrepancy) & a$discrepancy==min(a$discrepancy, na.rm = TRUE)]
print(best_seeds)
## [1] 172342
# Split datasets -------------------------------------------------------------
rngSeed = best_seeds[1]
# split Schedule A dataset
dset2use = subset(asd_df, asd_df$Schedule=="A")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
A_Discovery = tmp_d[[2]]
A_Replication = tmp_d[[1]]
# split Schedule B dataset
dset2use = subset(asd_df, asd_df$Schedule=="B")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
B_Discovery = tmp_d[[2]]
B_Replication = tmp_d[[1]]
# split Schedule C dataset
dset2use = subset(asd_df, asd_df$Schedule=="C")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
C_Discovery = tmp_d[[2]]
C_Replication = tmp_d[[1]]
# split Schedule D dataset
dset2use = subset(asd_df, asd_df$Schedule=="D")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
D_Discovery = tmp_d[[2]]
D_Replication = tmp_d[[1]]
df_Disc = rbind(A_Discovery, B_Discovery, C_Discovery, D_Discovery)
df_Rep = rbind(A_Replication, B_Replication, C_Replication, D_Replication)
a = table(df_Disc$Schedule, df_Disc$Centre); print(a)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A 6 17 10 7
## B 9 16 14 3
## C 6 9 14 2
## D 0 10 10 0
b = table(df_Rep$Schedule, df_Rep$Centre); print(b)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A 7 18 10 5
## B 8 17 13 5
## C 6 10 13 3
## D 0 9 9 0
print(a-b)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A -1 -1 0 2
## B 1 -1 1 -2
## C 0 -1 1 -1
## D 0 1 1 0
print(sum(rowSums(abs(a-b))))
## [1] 14
data2write$dataset = NA
mask = is.element(data2write$subid,df_Disc$subid)
data2write[mask,"dataset"] = "Discovery"
mask = is.element(data2write$subid,df_Rep$subid)
data2write[mask,"dataset"] = "Replication"
asd_Disc = subset(data2write, data2write$dataset=="Discovery" & data2write$Diagnosis=="ASD")
asd_Rep = subset(data2write, data2write$dataset=="Replication" & data2write$Diagnosis=="ASD")
# # find which seed gives best TD age-match -------------------------------------
# seeds = 1:1000
# pvals = data.frame(matrix(nrow = length(seeds), ncol = 2))
# for (i in 1:length(seeds)) {
# res = findTDAgeMatch(data2write, seed_range = c(seeds[i],seeds[i]))
# td_Disc_matched = res[[2]]
# td_Rep_matched = res[[1]]
# tres = t.test(td_Disc_matched$age, asd_Disc$age)
# pvals[i,1] = tres$p.value
# tres = t.test(td_Rep_matched$age, asd_Rep$age)
# pvals[i,2] = tres$p.value
# #print(i)
# }
# a = sort.int(pvals[,1], decreasing = TRUE, index.return = TRUE)
# b=pvals[a$ix,]
seed2use = 929
res = findTDAgeMatch(data2write, seed_range = c(seed2use,seed2use))
td_Disc_matched = res[[2]]
td_Rep_matched = res[[1]]
mask = is.element(data2write$subid, td_Disc_matched$subid)
data2write$dataset[mask] = "Discovery"
mask = is.element(data2write$subid, td_Rep_matched$subid)
data2write$dataset[mask] = "Replication"
print(table(data2write$dataset, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## Discovery 8 65 60 121
## Replication 3 67 63 122
fname2save = here(sprintf("asd_subgrp_data_rsfmri_ALL_DSM5_diffzscoreGrps_z%s.csv",as.character(z_thresh)))
write.csv(data2write,file = fname2save)
#------------------------------------------------------------------------------
# Descriptive stats
df2use = data2write[,c("subid","Diagnosis","dataset","Centre","meanFD","age","sex","subgrp","viq_all","piq_all","fsiq4_all","A_pct_severity","B_pct_severity","ADI_social_total","ADI_communication_total","ADI_RRB_total","ados_2_SA_CSS","ados_2_RRB_CSS","SRS_tscore","SRS_tscore_self","RBS_total","SSP_total","vabsdscoresc_dss","vabsdscoresd_dss","vabsdscoress_dss","vabsabcabc_standard")]
mask = df2use==999 | df2use==777
df2use[mask] = NA
df2use$age = df2use$age/365
cols2use = c("dataset","subgrp","age","meanFD","viq_all","piq_all","fsiq4_all","A_pct_severity","B_pct_severity","ADI_social_total","ADI_communication_total","ADI_RRB_total","ados_2_SA_CSS","ados_2_RRB_CSS","SRS_tscore","SRS_tscore_self","RBS_total","SSP_total","vabsdscoresc_dss","vabsdscoresd_dss","vabsdscoress_dss","vabsabcabc_standard")
res=describeBy(x = df2use[,cols2use], group = c("subgrp","dataset"))
res
##
## Descriptive statistics by group
## subgrp: RRB_over_SC
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 8 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 8 NaN NA NA NaN NA Inf -Inf
## age 3 8 17.53 7.05 21.73 17.53 2.56 7.89 23.88
## meanFD 4 8 0.27 0.36 0.18 0.27 0.07 0.06 1.14
## viq_all 5 8 103.86 13.94 107.50 103.86 12.60 78.00 123.85
## piq_all 6 8 101.37 9.62 99.50 101.37 9.64 87.00 114.00
## fsiq4_all 7 8 102.75 12.37 103.00 102.75 10.38 80.00 118.01
## A_pct_severity 8 8 0.20 0.11 0.19 0.20 0.13 0.05 0.35
## B_pct_severity 9 8 0.47 0.08 0.47 0.47 0.09 0.36 0.59
## ADI_social_total 10 8 17.50 7.80 20.50 17.50 7.41 5.00 26.00
## ADI_communication_total 11 8 16.25 6.30 17.00 16.25 7.41 6.00 24.00
## ADI_RRB_total 12 8 8.50 1.41 8.50 8.50 2.22 7.00 10.00
## ados_2_SA_CSS 13 8 3.75 2.87 3.00 3.75 2.22 1.00 9.00
## ados_2_RRB_CSS 14 8 3.88 3.98 1.00 3.88 0.00 1.00 9.00
## SRS_tscore 15 4 72.25 13.72 70.50 72.25 12.60 58.00 90.00
## SRS_tscore_self 16 4 59.00 7.12 61.00 59.00 4.45 49.00 65.00
## RBS_total 17 4 18.00 8.83 17.50 18.00 8.90 8.00 29.00
## SSP_total 18 3 142.00 24.06 140.00 142.00 31.13 119.00 167.00
## vabsdscoresc_dss 19 5 84.20 18.63 77.00 84.20 14.83 67.00 115.00
## vabsdscoresd_dss 20 5 69.00 9.46 74.00 69.00 7.41 57.00 79.00
## vabsdscoress_dss 21 5 70.40 8.99 71.00 70.40 2.97 57.00 82.00
## vabsabcabc_standard 22 5 72.80 10.76 72.00 72.80 7.41 59.00 88.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 16.00 -0.41 -1.98 2.49
## meanFD 1.09 1.77 1.50 0.13
## viq_all 45.85 -0.43 -0.93 4.93
## piq_all 27.00 0.09 -1.56 3.40
## fsiq4_all 38.01 -0.40 -1.08 4.37
## A_pct_severity 0.30 0.01 -1.94 0.04
## B_pct_severity 0.23 0.05 -1.67 0.03
## ADI_social_total 21.00 -0.41 -1.67 2.76
## ADI_communication_total 18.00 -0.26 -1.52 2.23
## ADI_RRB_total 3.00 0.00 -2.05 0.50
## ados_2_SA_CSS 8.00 0.70 -1.16 1.01
## ados_2_RRB_CSS 8.00 0.44 -2.00 1.41
## SRS_tscore 32.00 0.24 -2.00 6.86
## SRS_tscore_self 16.00 -0.50 -1.88 3.56
## RBS_total 21.00 0.11 -1.98 4.42
## SSP_total 48.00 0.08 -2.33 13.89
## vabsdscoresc_dss 48.00 0.71 -1.35 8.33
## vabsdscoresd_dss 22.00 -0.23 -2.10 4.23
## vabsdscoress_dss 25.00 -0.23 -1.46 4.02
## vabsabcabc_standard 29.00 0.14 -1.65 4.81
## ------------------------------------------------------------
## subgrp: SC_equal_RRB
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 65 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 65 NaN NA NA NaN NA Inf -Inf
## age 3 65 16.05 5.71 14.85 15.76 4.76 7.08 30.28
## meanFD 4 65 0.30 0.49 0.19 0.22 0.13 0.04 3.95
## viq_all 5 64 96.39 18.31 96.17 96.58 22.24 61.00 136.00
## piq_all 6 64 99.34 20.61 102.00 99.62 19.94 58.00 145.00
## fsiq4_all 7 65 98.03 18.10 102.00 98.61 19.27 60.00 131.00
## A_pct_severity 8 65 0.31 0.14 0.31 0.31 0.15 0.00 0.63
## B_pct_severity 9 65 0.30 0.15 0.29 0.30 0.16 0.01 0.69
## ADI_social_total 10 65 17.00 6.94 18.00 17.36 7.41 2.00 27.00
## ADI_communication_total 11 65 13.66 6.03 14.00 13.70 7.41 0.00 26.00
## ADI_RRB_total 12 65 5.31 2.57 5.00 5.34 2.97 0.00 12.00
## ados_2_SA_CSS 13 64 6.44 2.51 7.00 6.54 2.97 1.00 10.00
## ados_2_RRB_CSS 14 64 5.09 2.79 5.50 5.10 2.22 1.00 10.00
## SRS_tscore 15 57 71.93 12.19 74.00 72.32 13.34 45.00 90.00
## SRS_tscore_self 16 31 61.26 11.36 61.00 60.92 14.83 43.00 89.00
## RBS_total 17 55 18.24 14.19 17.00 16.51 13.34 0.00 60.00
## SSP_total 18 36 134.92 29.75 136.50 134.90 33.36 81.00 189.00
## vabsdscoresc_dss 19 63 72.92 18.48 75.00 73.57 13.34 21.00 122.00
## vabsdscoresd_dss 20 62 72.68 16.92 71.00 72.48 11.86 25.00 131.00
## vabsdscoress_dss 21 63 71.03 15.63 73.00 72.14 13.34 20.00 104.00
## vabsabcabc_standard 22 62 70.87 13.57 72.00 71.20 10.38 20.00 103.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 23.20 0.51 -0.51 0.71
## meanFD 3.91 6.50 45.73 0.06
## viq_all 75.00 -0.12 -0.84 2.29
## piq_all 87.00 -0.16 -0.61 2.58
## fsiq4_all 71.00 -0.29 -0.84 2.24
## A_pct_severity 0.63 0.02 -0.64 0.02
## B_pct_severity 0.68 0.40 -0.33 0.02
## ADI_social_total 25.00 -0.43 -0.93 0.86
## ADI_communication_total 26.00 -0.07 -0.81 0.75
## ADI_RRB_total 12.00 -0.02 -0.38 0.32
## ados_2_SA_CSS 9.00 -0.40 -0.97 0.31
## ados_2_RRB_CSS 9.00 -0.33 -1.05 0.35
## SRS_tscore 45.00 -0.25 -0.94 1.62
## SRS_tscore_self 46.00 0.23 -0.80 2.04
## RBS_total 60.00 1.04 0.66 1.91
## SSP_total 108.00 -0.01 -1.05 4.96
## vabsdscoresc_dss 101.00 -0.36 0.84 2.33
## vabsdscoresd_dss 106.00 0.23 1.98 2.15
## vabsdscoress_dss 84.00 -0.77 0.94 1.97
## vabsabcabc_standard 83.00 -0.59 2.44 1.72
## ------------------------------------------------------------
## subgrp: SC_over_RRB
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 60 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 60 NaN NA NA NaN NA Inf -Inf
## age 3 60 16.54 5.11 16.03 16.34 5.10 7.78 29.40
## meanFD 4 60 0.23 0.23 0.15 0.17 0.10 0.03 1.08
## viq_all 5 59 98.53 19.34 100.00 98.07 19.27 64.55 142.00
## piq_all 6 59 99.61 22.46 103.00 100.53 20.76 52.43 150.00
## fsiq4_all 7 60 99.23 19.52 101.97 99.71 19.60 59.00 143.00
## A_pct_severity 8 60 0.44 0.14 0.44 0.43 0.13 0.19 0.82
## B_pct_severity 9 60 0.16 0.10 0.15 0.16 0.11 0.00 0.40
## ADI_social_total 10 60 17.87 6.48 18.00 18.21 8.90 3.00 28.00
## ADI_communication_total 11 60 14.58 4.88 15.00 14.75 5.19 2.00 24.00
## ADI_RRB_total 12 60 3.00 1.95 3.00 2.88 1.48 0.00 8.00
## ados_2_SA_CSS 13 58 6.16 2.62 7.00 6.27 2.97 1.00 10.00
## ados_2_RRB_CSS 14 58 4.60 2.78 5.00 4.50 2.97 1.00 10.00
## SRS_tscore 15 53 73.09 11.53 74.00 73.58 10.38 44.00 95.00
## SRS_tscore_self 16 25 64.24 12.23 63.00 63.24 8.90 42.00 94.00
## RBS_total 17 51 15.98 16.59 12.00 13.41 13.34 0.00 90.00
## SSP_total 18 40 138.90 30.62 140.00 140.59 37.06 53.00 186.00
## vabsdscoresc_dss 19 55 70.89 15.67 72.00 72.38 8.90 21.00 103.00
## vabsdscoresd_dss 20 55 70.67 16.39 71.00 70.84 11.86 17.00 112.00
## vabsdscoress_dss 21 55 66.82 16.15 68.00 68.44 14.83 20.00 95.00
## vabsabcabc_standard 22 55 67.24 15.08 70.00 68.62 10.38 6.00 96.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 21.62 0.37 -0.47 0.66
## meanFD 1.05 2.21 4.38 0.03
## viq_all 77.45 0.18 -0.55 2.52
## piq_all 97.57 -0.38 -0.51 2.92
## fsiq4_all 84.00 -0.20 -0.87 2.52
## A_pct_severity 0.63 0.26 -0.23 0.02
## B_pct_severity 0.40 0.33 -0.80 0.01
## ADI_social_total 25.00 -0.36 -0.86 0.84
## ADI_communication_total 22.00 -0.34 -0.36 0.63
## ADI_RRB_total 8.00 0.61 -0.15 0.25
## ados_2_SA_CSS 9.00 -0.41 -1.01 0.34
## ados_2_RRB_CSS 9.00 -0.14 -1.18 0.36
## SRS_tscore 51.00 -0.34 -0.29 1.58
## SRS_tscore_self 52.00 0.89 0.35 2.45
## RBS_total 90.00 2.09 6.08 2.32
## SSP_total 133.00 -0.50 -0.17 4.84
## vabsdscoresc_dss 82.00 -1.23 2.74 2.11
## vabsdscoresd_dss 95.00 -0.34 1.48 2.21
## vabsdscoress_dss 75.00 -0.98 0.93 2.18
## vabsabcabc_standard 90.00 -1.69 4.81 2.03
## ------------------------------------------------------------
## subgrp: TD
## dataset: Discovery
## vars n mean sd median trimmed mad min
## dataset* 1 121 NaN NA NA NaN NA Inf
## subgrp* 2 121 NaN NA NA NaN NA Inf
## age 3 121 16.83 5.23 16.65 16.73 5.69 7.22
## meanFD 4 121 0.18 0.15 0.13 0.15 0.07 0.03
## viq_all 5 119 104.52 19.70 105.00 105.03 19.27 46.00
## piq_all 6 119 106.10 19.47 107.00 107.53 17.79 49.00
## fsiq4_all 7 119 105.72 18.33 108.18 106.99 16.58 53.00
## A_pct_severity 8 0 NaN NA NA NaN NA Inf
## B_pct_severity 9 0 NaN NA NA NaN NA Inf
## ADI_social_total 10 0 NaN NA NA NaN NA Inf
## ADI_communication_total 11 0 NaN NA NA NaN NA Inf
## ADI_RRB_total 12 0 NaN NA NA NaN NA Inf
## ados_2_SA_CSS 13 0 NaN NA NA NaN NA Inf
## ados_2_RRB_CSS 14 0 NaN NA NA NaN NA Inf
## SRS_tscore 15 68 47.84 9.40 45.00 46.32 5.19 37.00
## SRS_tscore_self 16 71 46.69 4.85 46.00 46.26 4.45 39.00
## RBS_total 17 68 2.15 4.74 0.00 0.95 0.00 0.00
## SSP_total 18 59 177.86 12.71 182.00 179.78 8.90 122.00
## vabsdscoresc_dss 19 34 91.97 25.44 99.50 93.50 21.50 21.00
## vabsdscoresd_dss 20 34 90.74 20.28 98.50 92.25 17.05 33.00
## vabsdscoress_dss 21 34 96.21 23.67 102.50 98.57 21.50 33.00
## vabsabcabc_standard 22 34 92.06 23.04 99.50 93.89 15.57 25.00
## max range skew kurtosis se
## dataset* -Inf -Inf NA NA NA
## subgrp* -Inf -Inf NA NA NA
## age 29.84 22.62 0.17 -0.51 0.48
## meanFD 0.85 0.82 2.28 5.65 0.01
## viq_all 160.00 114.00 -0.24 0.38 1.81
## piq_all 147.00 98.00 -0.69 0.32 1.79
## fsiq4_all 142.00 89.00 -0.69 0.58 1.68
## A_pct_severity -Inf -Inf NA NA NA
## B_pct_severity -Inf -Inf NA NA NA
## ADI_social_total -Inf -Inf NA NA NA
## ADI_communication_total -Inf -Inf NA NA NA
## ADI_RRB_total -Inf -Inf NA NA NA
## ados_2_SA_CSS -Inf -Inf NA NA NA
## ados_2_RRB_CSS -Inf -Inf NA NA NA
## SRS_tscore 76.00 39.00 1.54 1.44 1.14
## SRS_tscore_self 63.00 24.00 0.93 1.10 0.58
## RBS_total 27.00 27.00 3.13 10.87 0.57
## SSP_total 190.00 68.00 -1.90 4.85 1.66
## vabsdscoresc_dss 138.00 117.00 -0.71 0.36 4.36
## vabsdscoresd_dss 121.00 88.00 -0.80 0.07 3.48
## vabsdscoress_dss 129.00 96.00 -0.83 -0.31 4.06
## vabsabcabc_standard 127.00 102.00 -0.90 0.30 3.95
## ------------------------------------------------------------
## subgrp: RRB_over_SC
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 3 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 3 NaN NA NA NaN NA Inf -Inf
## age 3 3 14.54 4.39 12.61 14.54 1.72 11.45 19.56
## meanFD 4 3 0.23 0.14 0.20 0.23 0.15 0.10 0.38
## viq_all 5 3 111.00 28.00 99.00 111.00 11.86 91.00 143.00
## piq_all 6 3 119.33 29.54 121.00 119.33 40.03 89.00 148.00
## fsiq4_all 7 3 115.67 28.75 106.00 115.67 19.27 93.00 148.00
## A_pct_severity 8 3 0.20 0.07 0.16 0.20 0.01 0.15 0.27
## B_pct_severity 9 3 0.40 0.05 0.40 0.40 0.08 0.35 0.45
## ADI_social_total 10 3 15.67 2.52 16.00 15.67 2.97 13.00 18.00
## ADI_communication_total 11 3 8.67 2.52 9.00 8.67 2.97 6.00 11.00
## ADI_RRB_total 12 3 5.67 1.53 6.00 5.67 1.48 4.00 7.00
## ados_2_SA_CSS 13 3 4.67 2.52 5.00 4.67 2.97 2.00 7.00
## ados_2_RRB_CSS 14 3 4.33 3.06 5.00 4.33 2.97 1.00 7.00
## SRS_tscore 15 3 68.00 6.93 72.00 68.00 0.00 60.00 72.00
## SRS_tscore_self 16 1 67.00 NA 67.00 67.00 0.00 67.00 67.00
## RBS_total 17 3 12.67 5.51 13.00 12.67 7.41 7.00 18.00
## SSP_total 18 2 146.00 9.90 146.00 146.00 10.38 139.00 153.00
## vabsdscoresc_dss 19 3 82.33 14.43 74.00 82.33 0.00 74.00 99.00
## vabsdscoresd_dss 20 3 69.33 4.16 68.00 69.33 2.97 66.00 74.00
## vabsdscoress_dss 21 3 84.33 9.71 82.00 84.33 8.90 76.00 95.00
## vabsabcabc_standard 22 3 65.67 23.18 77.00 65.67 5.93 39.00 81.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 8.11 0.35 -2.33 2.53
## meanFD 0.28 0.18 -2.33 0.08
## viq_all 52.00 0.35 -2.33 16.17
## piq_all 59.00 -0.06 -2.33 17.05
## fsiq4_all 55.00 0.30 -2.33 16.60
## A_pct_severity 0.12 0.38 -2.33 0.04
## B_pct_severity 0.11 -0.02 -2.33 0.03
## ADI_social_total 5.00 -0.13 -2.33 1.45
## ADI_communication_total 5.00 -0.13 -2.33 1.45
## ADI_RRB_total 3.00 -0.21 -2.33 0.88
## ados_2_SA_CSS 5.00 -0.13 -2.33 1.45
## ados_2_RRB_CSS 6.00 -0.21 -2.33 1.76
## SRS_tscore 12.00 -0.38 -2.33 4.00
## SRS_tscore_self 0.00 NA NA NA
## RBS_total 11.00 -0.06 -2.33 3.18
## SSP_total 14.00 0.00 -2.75 7.00
## vabsdscoresc_dss 25.00 0.38 -2.33 8.33
## vabsdscoresd_dss 8.00 0.29 -2.33 2.40
## vabsdscoress_dss 19.00 0.23 -2.33 5.61
## vabsabcabc_standard 42.00 -0.37 -2.33 13.38
## ------------------------------------------------------------
## subgrp: SC_equal_RRB
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 67 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 67 NaN NA NA NaN NA Inf -Inf
## age 3 67 16.58 5.91 16.07 16.25 6.51 7.12 30.15
## meanFD 4 67 0.26 0.30 0.19 0.20 0.12 0.05 1.60
## viq_all 5 66 102.33 15.72 103.36 102.83 17.61 70.00 133.00
## piq_all 6 66 103.66 18.68 105.98 104.81 18.57 52.00 134.00
## fsiq4_all 7 66 103.29 16.70 105.50 104.06 17.42 64.00 131.00
## A_pct_severity 8 67 0.26 0.12 0.24 0.26 0.12 0.04 0.65
## B_pct_severity 9 67 0.24 0.13 0.23 0.23 0.12 0.00 0.67
## ADI_social_total 10 67 14.70 6.22 15.00 14.91 5.93 1.00 27.00
## ADI_communication_total 11 67 11.58 5.53 11.00 11.51 5.93 0.00 24.00
## ADI_RRB_total 12 67 3.90 2.28 4.00 3.84 1.48 0.00 9.00
## ados_2_SA_CSS 13 65 5.62 2.47 6.00 5.68 2.97 1.00 10.00
## ados_2_RRB_CSS 14 65 4.94 2.54 6.00 4.92 1.48 1.00 10.00
## SRS_tscore 15 60 66.20 11.19 67.00 66.00 11.86 43.00 90.00
## SRS_tscore_self 16 32 61.81 8.16 62.00 61.62 6.67 46.00 84.00
## RBS_total 17 57 13.54 11.27 11.00 12.28 10.38 0.00 52.00
## SSP_total 18 37 140.19 26.49 142.00 142.13 32.62 69.00 177.00
## vabsdscoresc_dss 19 61 82.59 14.53 81.00 81.78 13.34 50.00 122.00
## vabsdscoresd_dss 20 60 79.18 16.08 78.50 78.67 12.60 38.00 119.00
## vabsdscoress_dss 21 61 76.48 16.12 78.00 77.33 13.34 28.00 112.00
## vabsabcabc_standard 22 60 78.53 12.93 77.50 78.02 9.64 48.00 117.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 23.03 0.42 -0.64 0.72
## meanFD 1.55 3.19 10.73 0.04
## viq_all 63.00 -0.23 -0.85 1.94
## piq_all 82.00 -0.56 -0.03 2.30
## fsiq4_all 67.00 -0.40 -0.56 2.06
## A_pct_severity 0.61 0.43 0.29 0.01
## B_pct_severity 0.67 0.72 0.90 0.02
## ADI_social_total 26.00 -0.25 -0.77 0.76
## ADI_communication_total 24.00 0.10 -0.59 0.68
## ADI_RRB_total 9.00 0.26 -0.52 0.28
## ados_2_SA_CSS 9.00 -0.23 -0.85 0.31
## ados_2_RRB_CSS 9.00 -0.40 -0.78 0.31
## SRS_tscore 47.00 0.12 -0.76 1.44
## SRS_tscore_self 38.00 0.42 0.53 1.44
## RBS_total 52.00 1.23 1.60 1.49
## SSP_total 108.00 -0.62 -0.39 4.36
## vabsdscoresc_dss 72.00 0.50 0.03 1.86
## vabsdscoresd_dss 81.00 0.22 0.17 2.08
## vabsdscoress_dss 84.00 -0.62 0.97 2.06
## vabsabcabc_standard 69.00 0.45 0.40 1.67
## ------------------------------------------------------------
## subgrp: SC_over_RRB
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 63 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 63 NaN NA NA NaN NA Inf -Inf
## age 3 63 16.30 5.10 16.07 16.17 5.91 7.48 29.23
## meanFD 4 63 0.22 0.20 0.16 0.19 0.10 0.04 1.31
## viq_all 5 60 95.21 19.99 98.00 96.37 21.50 50.91 130.00
## piq_all 6 62 98.09 21.23 103.50 99.53 19.27 44.03 138.00
## fsiq4_all 7 61 97.35 19.71 103.00 98.23 19.64 59.00 139.00
## A_pct_severity 8 63 0.48 0.14 0.49 0.48 0.16 0.19 0.75
## B_pct_severity 9 63 0.20 0.12 0.20 0.19 0.15 0.00 0.47
## ADI_social_total 10 63 18.67 5.75 19.00 18.92 5.93 6.00 29.00
## ADI_communication_total 11 63 15.11 4.71 16.00 15.29 4.45 4.00 24.00
## ADI_RRB_total 12 63 3.90 2.52 4.00 3.76 2.97 0.00 10.00
## ados_2_SA_CSS 13 60 6.18 2.89 6.00 6.29 4.45 1.00 10.00
## ados_2_RRB_CSS 14 60 4.53 2.76 5.00 4.42 2.97 1.00 9.00
## SRS_tscore 15 55 75.04 11.69 78.00 75.98 13.34 48.00 90.00
## SRS_tscore_self 16 30 62.40 9.92 61.00 62.21 9.64 40.00 84.00
## RBS_total 17 55 19.35 15.40 15.00 17.69 11.86 0.00 73.00
## SSP_total 18 44 138.73 25.65 139.50 138.97 28.17 91.00 184.00
## vabsdscoresc_dss 19 57 72.02 14.52 72.00 72.81 11.86 21.00 102.00
## vabsdscoresd_dss 20 57 69.95 15.18 68.00 69.74 14.83 42.00 118.00
## vabsdscoress_dss 21 57 65.23 14.98 66.00 65.62 14.83 23.00 100.00
## vabsabcabc_standard 22 57 67.11 12.99 68.00 67.70 8.90 28.00 94.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 21.75 0.27 -0.76 0.64
## meanFD 1.26 3.14 13.22 0.02
## viq_all 79.09 -0.45 -0.67 2.58
## piq_all 93.97 -0.54 -0.57 2.70
## fsiq4_all 80.00 -0.37 -0.81 2.52
## A_pct_severity 0.56 -0.07 -0.96 0.02
## B_pct_severity 0.47 0.32 -0.80 0.01
## ADI_social_total 23.00 -0.38 -0.68 0.72
## ADI_communication_total 20.00 -0.37 -0.80 0.59
## ADI_RRB_total 10.00 0.45 -0.67 0.32
## ados_2_SA_CSS 9.00 -0.17 -1.24 0.37
## ados_2_RRB_CSS 8.00 -0.13 -1.29 0.36
## SRS_tscore 42.00 -0.59 -0.67 1.58
## SRS_tscore_self 44.00 0.11 -0.23 1.81
## RBS_total 73.00 1.13 1.07 2.08
## SSP_total 93.00 -0.13 -0.98 3.87
## vabsdscoresc_dss 81.00 -0.91 2.25 1.92
## vabsdscoresd_dss 76.00 0.44 0.20 2.01
## vabsdscoress_dss 77.00 -0.32 0.14 1.98
## vabsabcabc_standard 66.00 -0.60 1.15 1.72
## ------------------------------------------------------------
## subgrp: TD
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 122 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 122 NaN NA NA NaN NA Inf -Inf
## age 3 122 16.86 6.07 16.34 16.58 7.59 6.89 29.72
## meanFD 4 122 0.23 0.46 0.14 0.15 0.07 0.04 4.60
## viq_all 5 122 104.02 17.58 108.18 105.64 12.13 45.00 140.00
## piq_all 6 122 104.64 18.41 108.96 106.56 14.08 49.00 139.00
## fsiq4_all 7 122 104.94 17.14 108.09 107.29 11.86 50.00 134.00
## A_pct_severity 8 0 NaN NA NA NaN NA Inf -Inf
## B_pct_severity 9 0 NaN NA NA NaN NA Inf -Inf
## ADI_social_total 10 0 NaN NA NA NaN NA Inf -Inf
## ADI_communication_total 11 0 NaN NA NA NaN NA Inf -Inf
## ADI_RRB_total 12 0 NaN NA NA NaN NA Inf -Inf
## ados_2_SA_CSS 13 0 NaN NA NA NaN NA Inf -Inf
## ados_2_RRB_CSS 14 0 NaN NA NA NaN NA Inf -Inf
## SRS_tscore 15 65 47.23 9.34 44.00 45.66 4.45 37.00 90.00
## SRS_tscore_self 16 61 48.44 6.84 47.00 47.63 5.93 39.00 69.00
## RBS_total 17 63 3.08 11.54 0.00 0.86 0.00 0.00 89.00
## SSP_total 18 54 174.93 19.38 182.00 178.41 6.67 75.00 190.00
## vabsdscoresc_dss 19 39 92.74 25.45 96.00 95.82 20.76 21.00 125.00
## vabsdscoresd_dss 20 39 91.10 22.65 97.00 93.91 14.83 27.00 122.00
## vabsdscoress_dss 21 39 98.90 27.04 103.00 102.12 17.79 20.00 132.00
## vabsabcabc_standard 22 38 93.00 25.39 100.00 96.44 15.57 20.00 126.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 22.83 0.33 -0.97 0.55
## meanFD 4.56 7.54 64.83 0.04
## viq_all 95.00 -0.99 1.51 1.59
## piq_all 90.00 -0.93 0.67 1.67
## fsiq4_all 84.00 -1.28 1.67 1.55
## A_pct_severity -Inf NA NA NA
## B_pct_severity -Inf NA NA NA
## ADI_social_total -Inf NA NA NA
## ADI_communication_total -Inf NA NA NA
## ADI_RRB_total -Inf NA NA NA
## ados_2_SA_CSS -Inf NA NA NA
## ados_2_RRB_CSS -Inf NA NA NA
## SRS_tscore 53.00 2.14 5.82 1.16
## SRS_tscore_self 30.00 1.05 0.48 0.88
## RBS_total 89.00 6.60 45.89 1.45
## SSP_total 115.00 -2.88 10.92 2.64
## vabsdscoresc_dss 104.00 -1.21 1.00 4.08
## vabsdscoresd_dss 95.00 -1.34 1.26 3.63
## vabsdscoress_dss 112.00 -1.26 1.10 4.33
## vabsabcabc_standard 106.00 -1.42 1.40 4.12
#------------------------------------------------------------------------------
# Table of subtypes X sex
# Discovery
data2use = subset(data2write,data2write$dataset=="Discovery")
table(data2use$subgrp,data2use$sex)
##
## Female Male
## RRB_over_SC 4 4
## SC_equal_RRB 16 49
## SC_over_RRB 15 45
## TD 41 80
cs_res = chisq.test(data2use$subgrp,data2use$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: data2use$subgrp and data2use$sex
## X-squared = 4.0102, df = 3, p-value = 0.2604
# Replication
data2use = subset(data2write,data2write$dataset=="Replication")
table(data2use$subgrp,data2use$sex)
##
## Female Male
## RRB_over_SC 1 2
## SC_equal_RRB 17 50
## SC_over_RRB 18 45
## TD 47 75
cs_res = chisq.test(data2use$subgrp,data2use$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: data2use$subgrp and data2use$sex
## X-squared = 4.0105, df = 3, p-value = 0.2603
#------------------------------------------------------------------------------
vars2analyze = c("age","meanFD","viq_all","piq_all","fsiq4_all",
"A_pct_severity","B_pct_severity",
"ADI_social_total","ADI_communication_total","ADI_RRB_total",
"ados_2_SA_CSS","ados_2_RRB_CSS",
"SRS_tscore_self","RBS_total","SSP_total",
"vabsdscoress_dss","vabsdscoresd_dss","vabsdscoresc_dss","vabsabcabc_standard")
vnames = c("Age","Mean FD","VIQ","PIQ","FIQ","ADI-R SC","ADI-R RRB","ADI-R Social","ADI-R Communication","ADI-R RRB","ADOS SA CSS","ADOS RRB CSS","SRS","RBS","SSP","Vineland Socialization","Vineland Daily Living Skills","Vineland Communication","Vineland ABC")
cnames = c("All_Disc.fstat","All_Disc.pval","All_Rep.fstat","All_Rep.pval",
"SCequalRRB_vs_SCoverRRB_Disc.fstat","SCequalRRB_vs_SCoverRRB_Disc.tstat",
"SCequalRRB_vs_SCoverRRB_Disc.pval","SCequalRRB_vs_SCoverRRB_Disc.es",
"SCequalRRB_vs_SCoverRRB_Rep.fstat","SCequalRRB_vs_SCoverRRB_Rep.tstat",
"SCequalRRB_vs_SCoverRRB_Rep.pval","SCequalRRB_vs_SCoverRRB_Rep.es",
"SCequalRRB_vs_SCoverRRB.repBF")
output_res = data.frame(matrix(nrow = length(vars2analyze),ncol = length(cnames)))
colnames(output_res) = cnames
rownames(output_res) = vars2analyze
output_res$varNames = vars2analyze
for (ivar in 1:length(vars2analyze)){
y_var = vars2analyze[ivar]
# print(y_var)
#----------------------------------------------------------------------------
# Discovery
df4mod = subset(df2use, df2use$dataset=="Discovery")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"All_Disc.fstat"] = res["subgrp","F-value"]
output_res[y_var,"All_Disc.pval"] = res["subgrp","p-value"]
#----------------------------------------------------------------------------
# Replication
df4mod = subset(df2use, df2use$dataset=="Replication")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"All_Rep.fstat"] = res["subgrp","F-value"]
output_res[y_var,"All_Rep.pval"] = res["subgrp","p-value"]
#----------------------------------------------------------------------------
# Discovery
df4mod = subset(df2use, df2use$dataset=="Discovery" & !is.element(df2use$subgrp,c("TD","RRB_over_SC")))
n1 = sum(df4mod$subgrp=="SC_equal_RRB")
m1 = sum(df4mod$subgrp=="SC_over_RRB")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.fstat"] = res["subgrp","F-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.pval"] = res["subgrp","p-value"]
res = summary(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"] = res$tTable[2,"t-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.es"] = cohens_d(df4mod[df4mod$subgrp=="SC_equal_RRB",y_var],
df4mod[df4mod$subgrp=="SC_over_RRB",y_var])
#----------------------------------------------------------------------------
# Replication
df4mod = subset(df2use, df2use$dataset=="Replication" & !is.element(df2use$subgrp,c("TD","RRB_over_SC")))
n2 = sum(df4mod$subgrp=="SC_equal_RRB")
m2 = sum(df4mod$subgrp=="SC_over_RRB")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.fstat"] = res["subgrp","F-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.pval"] = res["subgrp","p-value"]
res = summary(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.tstat"] = res$tTable[2,"t-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.es"] = cohens_d(df4mod[df4mod$subgrp=="SC_equal_RRB",y_var],
df4mod[df4mod$subgrp=="SC_over_RRB",y_var])
#----------------------------------------------------------------------------
# Replication Bayes Factor
res_bf = BFSALL(tobs = output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"],
trep = output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"],
n1 = n1,
n2 = n2,
m1 = m1,
m2 = m2,
sample = 2,
Type = 'ALL')
output_res[y_var,"SCequalRRB_vs_SCoverRRB.repBF"] = res_bf["Replication BF","Replication 1"]
# make a plot
colors2use = get_ggColorHue(3)
df4plot = subset(df2use, df2use$subgrp!="RRB_over_SC")
p = ggplot(data = df4plot, aes_string(x = "subgrp", y = y_var, colour = "subgrp")) + facet_grid(. ~ dataset)
p = p + geom_jitter() + geom_boxplot(fill = NA, colour = "#000000", outlier.shape = NA)
p = p + ylab(vnames[ivar]) + xlab("Group") +
scale_x_discrete(labels=c("SC_equal_RRB"="SC=RRB","SC_over_RRB"="SC>RRB","TD"="TD")) +
scale_colour_manual(values = c(colors2use[2:3],"grey60")) + guides(colour=FALSE) +
theme(text = element_text(size=fontSize-5),
axis.text.x = element_text(size=fontSize-5),
axis.text.y = element_text(size=fontSize-5))
print(p)
}
vabc["0.8","Discovery"] = output_res["vabsabcabc_standard","SCequalRRB_vs_SCoverRRB_Disc.es"]
vabc["0.8","Replication"] = output_res["vabsabcabc_standard","SCequalRRB_vs_SCoverRRB_Rep.es"]
vabc_dls["0.8","Discovery"] = output_res["vabsdscoresd_dss","SCequalRRB_vs_SCoverRRB_Disc.es"]
vabc_dls["0.8","Replication"] = output_res["vabsdscoresd_dss","SCequalRRB_vs_SCoverRRB_Rep.es"]
output_res
## All_Disc.fstat All_Disc.pval All_Rep.fstat All_Rep.pval
## age 0.3468322 7.914756e-01 0.5287789 6.629189e-01
## meanFD 2.4076706 6.776486e-02 0.1930085 9.011052e-01
## viq_all 2.5251084 5.821669e-02 2.4621363 6.317097e-02
## piq_all 1.5758519 1.958225e-01 1.0482434 3.718537e-01
## fsiq4_all 2.4129618 6.732296e-02 1.5741816 1.962065e-01
## A_pct_severity 26.7618096 2.002742e-10 59.8316827 0.000000e+00
## B_pct_severity 30.5403989 1.481371e-11 3.2122715 4.355799e-02
## ADI_social_total 1.1411638 3.227004e-01 9.6503824 1.254231e-04
## ADI_communication_total 1.5717852 2.116868e-01 12.4266051 1.177914e-05
## ADI_RRB_total 27.4062031 1.274802e-10 0.5610312 5.720280e-01
## ados_2_SA_CSS 3.7940808 2.515948e-02 1.3359348 2.667306e-01
## ados_2_RRB_CSS 1.2774997 2.823758e-01 0.5571873 5.742687e-01
## SRS_tscore_self 38.5195895 0.000000e+00 31.9168435 3.996803e-15
## RBS_total 18.7453428 1.495031e-10 15.3366627 7.082595e-09
## SSP_total 30.5672415 5.107026e-15 22.8983017 5.992984e-12
## vabsdscoress_dss 23.4592386 1.657785e-12 25.1597168 2.745582e-13
## vabsdscoresd_dss 11.8354963 5.357631e-07 10.9006901 1.584419e-06
## vabsdscoresc_dss 9.7808359 6.217169e-06 9.8065124 5.902403e-06
## vabsabcabc_standard 18.3979940 3.298979e-10 18.1084676 4.323943e-10
## SCequalRRB_vs_SCoverRRB_Disc.fstat
## age 0.26192656
## meanFD 0.97946508
## viq_all 0.30231080
## piq_all 0.01015093
## fsiq4_all 0.12578085
## A_pct_severity 34.73241475
## B_pct_severity 39.14477824
## ADI_social_total 1.47054314
## ADI_communication_total 2.92929727
## ADI_RRB_total 30.84312183
## ados_2_SA_CSS 0.35270821
## ados_2_RRB_CSS 0.78531699
## SRS_tscore_self 2.80844616
## RBS_total 0.25978094
## SSP_total 0.48083177
## vabsdscoress_dss 3.28380267
## vabsdscoresd_dss 1.01228803
## vabsdscoresc_dss 0.43999328
## vabsabcabc_standard 2.99819941
## SCequalRRB_vs_SCoverRRB_Disc.tstat
## age 0.5117876
## meanFD -0.9896793
## viq_all 0.5498280
## piq_all 0.1007518
## fsiq4_all 0.3546560
## A_pct_severity 5.8934213
## B_pct_severity -6.2565788
## ADI_social_total 1.2126595
## ADI_communication_total 1.7115190
## ADI_RRB_total -5.5536584
## ados_2_SA_CSS -0.5938924
## ados_2_RRB_CSS -0.8861811
## SRS_tscore_self 1.6758419
## RBS_total -0.5096871
## SSP_total 0.6934203
## vabsdscoress_dss -1.8121266
## vabsdscoresd_dss -1.0061253
## vabsdscoresc_dss -0.6633199
## vabsabcabc_standard -1.7315309
## SCequalRRB_vs_SCoverRRB_Disc.pval
## age 6.097402e-01
## meanFD 3.243223e-01
## viq_all 5.834766e-01
## piq_all 9.199185e-01
## fsiq4_all 7.234699e-01
## A_pct_severity 3.550766e-08
## B_pct_severity 6.295433e-09
## ADI_social_total 2.276420e-01
## ADI_communication_total 8.956880e-02
## ADI_RRB_total 1.708319e-07
## ados_2_SA_CSS 5.537306e-01
## ados_2_RRB_CSS 3.773378e-01
## SRS_tscore_self 9.989021e-02
## RBS_total 6.113826e-01
## SSP_total 4.903081e-01
## vabsdscoress_dss 7.262213e-02
## vabsdscoresd_dss 3.165241e-01
## vabsdscoresc_dss 5.084766e-01
## vabsabcabc_standard 8.611059e-02
## SCequalRRB_vs_SCoverRRB_Disc.es
## age -0.08989673
## meanFD 0.17718101
## viq_all -0.11356733
## piq_all -0.01252847
## fsiq4_all -0.06349360
## A_pct_severity -0.89512431
## B_pct_severity 1.09777877
## ADI_social_total -0.12891662
## ADI_communication_total -0.16745540
## ADI_RRB_total 1.00708452
## ados_2_SA_CSS 0.11005817
## ados_2_RRB_CSS 0.17618905
## SRS_tscore_self -0.25296265
## RBS_total 0.14661093
## SSP_total -0.13203186
## vabsdscoress_dss 0.26526047
## vabsdscoresd_dss 0.12028055
## vabsdscoresc_dss 0.11808163
## vabsabcabc_standard 0.25391669
## SCequalRRB_vs_SCoverRRB_Rep.fstat
## age 0.08151188
## meanFD 0.82655445
## viq_all 3.72814980
## piq_all 1.08570322
## fsiq4_all 1.85608227
## A_pct_severity 113.29691674
## B_pct_severity 1.51006239
## ADI_social_total 18.96724209
## ADI_communication_total 21.24526531
## ADI_RRB_total 0.20305007
## ados_2_SA_CSS 1.66094873
## ados_2_RRB_CSS 1.08347382
## SRS_tscore_self 0.06525365
## RBS_total 8.02057567
## SSP_total 0.17011474
## vabsdscoress_dss 18.72611076
## vabsdscoresd_dss 12.09828145
## vabsdscoresc_dss 15.60770992
## vabsabcabc_standard 27.59806183
## SCequalRRB_vs_SCoverRRB_Rep.tstat
## age -0.2855028
## meanFD -0.9091504
## viq_all -1.9308417
## piq_all -1.0419708
## fsiq4_all -1.3623811
## A_pct_severity 10.6441024
## B_pct_severity -1.2288460
## ADI_social_total 4.3551397
## ADI_communication_total 4.6092587
## ADI_RRB_total 0.4506108
## ados_2_SA_CSS 1.2887780
## ados_2_RRB_CSS -1.0409005
## SRS_tscore_self 0.2554479
## RBS_total 2.8320621
## SSP_total -0.4124497
## vabsdscoress_dss -4.3273676
## vabsdscoresd_dss -3.4782584
## vabsdscoresc_dss -3.9506594
## vabsabcabc_standard -5.2533857
## SCequalRRB_vs_SCoverRRB_Rep.pval
## age 7.757315e-01
## meanFD 3.650204e-01
## viq_all 5.584194e-02
## piq_all 2.994689e-01
## fsiq4_all 1.755875e-01
## A_pct_severity 0.000000e+00
## B_pct_severity 2.214380e-01
## ADI_social_total 2.741201e-05
## ADI_communication_total 9.830284e-06
## ADI_RRB_total 6.530507e-01
## ados_2_SA_CSS 1.999543e-01
## ados_2_RRB_CSS 3.000144e-01
## SRS_tscore_self 7.992967e-01
## RBS_total 5.527113e-03
## SSP_total 6.811710e-01
## vabsdscoress_dss 3.274276e-05
## vabsdscoresd_dss 7.197026e-04
## vabsdscoresc_dss 1.360567e-04
## vabsabcabc_standard 7.194943e-07
## SCequalRRB_vs_SCoverRRB_Rep.es
## age 0.050104241
## meanFD 0.159551075
## viq_all 0.397233907
## piq_all 0.278814494
## fsiq4_all 0.325765875
## A_pct_severity -1.660514857
## B_pct_severity 0.296042356
## ADI_social_total -0.661290880
## ADI_communication_total -0.684879764
## ADI_RRB_total -0.003853938
## ados_2_SA_CSS -0.211654450
## ados_2_RRB_CSS 0.152912314
## SRS_tscore_self -0.064904510
## RBS_total -0.431994003
## SSP_total 0.056040468
## vabsdscoress_dss 0.721996648
## vabsdscoresd_dss 0.590093340
## vabsdscoresc_dss 0.727794865
## vabsabcabc_standard 0.882109733
## SCequalRRB_vs_SCoverRRB.repBF varNames
## age 7.988837e-01 age
## meanFD 1.141523e+00 meanFD
## viq_all 8.138378e-01 viq_all
## piq_all 7.037967e-01 piq_all
## fsiq4_all 7.457033e-01 fsiq4_all
## A_pct_severity 3.481048e+06 A_pct_severity
## B_pct_severity 1.931946e+07 B_pct_severity
## ADI_social_total 1.458599e+00 ADI_social_total
## ADI_communication_total 2.997668e+00 ADI_communication_total
## ADI_RRB_total 7.330776e+05 ADI_RRB_total
## ados_2_SA_CSS 8.360759e-01 ados_2_SA_CSS
## ados_2_RRB_CSS 1.035729e+00 ados_2_RRB_CSS
## SRS_tscore_self 2.826856e+00 SRS_tscore_self
## RBS_total 7.973627e-01 RBS_total
## SSP_total 8.914452e-01 SSP_total
## vabsdscoress_dss 3.562055e+00 vabsdscoress_dss
## vabsdscoresd_dss 1.161038e+00 vabsdscoresd_dss
## vabsdscoresc_dss 8.714754e-01 vabsdscoresc_dss
## vabsabcabc_standard 3.095480e+00 vabsabcabc_standard
#------------------------------------------------------------------------------
# Z-score threshold to use for subtyping
z_thresh = 0.9
# vars2use = c("dbaes_atotal","dbaes_btotal")
# compute Discovery mean and sd
ds_disc = Dverbal_Discovery[,vars2use[1]] - Dverbal_Discovery[,vars2use[2]]
mean2use = mean(ds_disc)
sd2use = sd(ds_disc)
Dverbal_Discovery = make_subtype(data2use = Dverbal_Discovery,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
# compute Replication mean and sd
ds_rep = Dverbal_Replication[,vars2use[1]] - Dverbal_Replication[,vars2use[2]]
mean2use = mean(ds_rep)
sd2use = sd(ds_rep)
Dverbal_Replication = make_subtype(data2use = Dverbal_Replication,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#------------------------------------------------------------------------------
# Table of subtypes X sex
# Discovery
table(Dverbal_Discovery$z_ds_group,Dverbal_Discovery$sex)
##
## F M
## RRB_over_SC 29 130
## SC_equal_RRB 134 442
## SC_over_RRB 34 120
cs_res = chisq.test(Dverbal_Discovery$z_ds_group,Dverbal_Discovery$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: Dverbal_Discovery$z_ds_group and Dverbal_Discovery$sex
## X-squared = 1.8247, df = 2, p-value = 0.4016
# Replication
table(Dverbal_Replication$z_ds_group,Dverbal_Replication$sex)
##
## F M
## RRB_over_SC 26 126
## SC_equal_RRB 137 460
## SC_over_RRB 33 108
cs_res = chisq.test(Dverbal_Replication$z_ds_group,Dverbal_Replication$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: Dverbal_Replication$z_ds_group and Dverbal_Replication$sex
## X-squared = 2.5948, df = 2, p-value = 0.2732
#------------------------------------------------------------------------------
# Descriptive stats
# Discovery
df2use = Dverbal_Discovery[,c("subjectkey","dataset_id","collection_id","interview_age","sex","z_ds_group","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")]
df2use$age = df2use$interview_age/12
cols2use = c("z_ds_group","sex","age","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")
res=describeBy(x = df2use[,cols2use], group = "z_ds_group")
res
##
## Descriptive statistics by group
## group: RRB_over_SC
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 159 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00
## sex* 2 159 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 159 9.95 3.96 9.50 9.70 3.46 2.00 22.58 20.58
## ados_age 4 22 91.14 38.67 85.00 88.61 37.06 37.00 171.00 134.00
## ados_sa_css 5 22 6.18 2.48 6.50 6.22 3.71 2.00 10.00 8.00
## ados_rrb_css 6 22 7.45 2.22 8.00 7.72 1.48 1.00 10.00 9.00
## iq 7 43 103.00 14.43 102.00 103.46 16.31 67.00 139.00 72.00
## dbaes_atotal 8 159 0.21 0.11 0.21 0.21 0.12 0.01 0.45 0.44
## dbaes_btotal 9 159 0.49 0.12 0.48 0.48 0.14 0.25 0.79 0.54
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 0.63 0.23 0.31
## ados_age 0.53 -0.80 8.24
## ados_sa_css -0.09 -1.26 0.53
## ados_rrb_css -1.13 0.96 0.47
## iq -0.21 0.11 2.20
## dbaes_atotal 0.14 -0.81 0.01
## dbaes_btotal 0.22 -0.68 0.01
## ------------------------------------------------------------
## group: SC_equal_RRB
## vars n mean sd median trimmed mad min max range skew
## z_ds_group* 1 576 2.00 0.00 2.00 2.00 0.00 2 2.00 0.00 NaN
## sex* 2 576 NaN NA NA NaN NA Inf -Inf -Inf NA
## age 3 576 9.07 5.41 8.08 8.32 4.69 0 45.75 45.75 1.97
## ados_age 4 93 82.66 44.56 73.00 77.91 50.41 27 202.00 175.00 0.73
## ados_sa_css 5 93 6.91 2.03 7.00 7.01 1.48 1 10.00 9.00 -0.40
## ados_rrb_css 6 93 7.73 2.23 8.00 8.11 1.48 1 10.00 9.00 -1.64
## iq 7 136 104.38 18.44 107.00 105.80 17.05 42 138.00 96.00 -0.85
## dbaes_atotal 8 576 0.30 0.14 0.30 0.30 0.14 0 0.70 0.70 0.02
## dbaes_btotal 9 576 0.32 0.14 0.32 0.32 0.14 0 0.68 0.68 -0.07
## kurtosis se
## z_ds_group* NaN 0.00
## sex* NA NA
## age 6.72 0.23
## ados_age -0.61 4.62
## ados_sa_css -0.17 0.21
## ados_rrb_css 2.69 0.23
## iq 1.03 1.58
## dbaes_atotal -0.31 0.01
## dbaes_btotal -0.30 0.01
## ------------------------------------------------------------
## group: SC_over_RRB
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 154 3.00 0.00 3.00 3.00 0.00 3.00 3.00 0.00
## sex* 2 154 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 154 7.28 5.53 5.58 6.20 2.84 1.67 37.33 35.67
## ados_age 4 38 66.37 33.78 63.00 60.75 25.20 30.00 172.00 142.00
## ados_sa_css 5 38 7.45 1.54 7.00 7.47 1.48 4.00 10.00 6.00
## ados_rrb_css 6 38 8.16 1.79 8.00 8.38 1.48 1.00 10.00 9.00
## iq 7 20 102.80 22.13 106.50 104.19 20.02 40.00 140.00 100.00
## dbaes_atotal 8 154 0.50 0.13 0.49 0.50 0.13 0.20 0.87 0.67
## dbaes_btotal 9 154 0.22 0.10 0.22 0.21 0.09 0.00 0.47 0.47
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 2.47 7.39 0.45
## ados_age 1.68 2.63 5.48
## ados_sa_css -0.10 -0.73 0.25
## ados_rrb_css -1.63 4.33 0.29
## iq -0.84 0.96 4.95
## dbaes_atotal 0.31 -0.25 0.01
## dbaes_btotal 0.17 -0.16 0.01
# Replication
df2use = Dverbal_Replication[,c("subjectkey","dataset_id","collection_id","interview_age","sex","z_ds_group","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")]
df2use$age = df2use$interview_age/12
cols2use = c("z_ds_group","sex","age","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")
res=describeBy(x = df2use[,cols2use], group = "z_ds_group")
res
##
## Descriptive statistics by group
## group: RRB_over_SC
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 152 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00
## sex* 2 152 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 152 9.92 4.57 9.54 9.49 4.14 3.17 28.58 25.42
## ados_age 4 11 107.00 50.26 110.00 105.78 51.89 36.00 189.00 153.00
## ados_sa_css 5 11 6.73 2.05 7.00 6.89 1.48 3.00 9.00 6.00
## ados_rrb_css 6 11 6.55 1.04 7.00 6.56 0.00 5.00 8.00 3.00
## iq 7 45 104.87 17.11 104.00 104.38 16.31 58.00 152.00 94.00
## dbaes_atotal 8 152 0.21 0.10 0.21 0.21 0.09 0.01 0.61 0.60
## dbaes_btotal 9 152 0.51 0.12 0.49 0.50 0.11 0.24 0.93 0.69
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 1.17 2.32 0.37
## ados_age 0.07 -1.44 15.15
## ados_sa_css -0.80 -0.73 0.62
## ados_rrb_css -0.60 -1.27 0.31
## iq 0.16 0.97 2.55
## dbaes_atotal 0.82 1.43 0.01
## dbaes_btotal 0.64 0.82 0.01
## ------------------------------------------------------------
## group: SC_equal_RRB
## vars n mean sd median trimmed mad min max range skew
## z_ds_group* 1 597 2.00 0.00 2.00 2.00 0.00 2 2.00 0.00 NaN
## sex* 2 597 NaN NA NA NaN NA Inf -Inf -Inf NA
## age 3 597 8.98 5.45 8.00 8.17 4.57 0 40.92 40.92 1.79
## ados_age 4 113 78.96 37.85 71.00 74.65 40.03 35 196.00 161.00 0.88
## ados_sa_css 5 113 6.90 2.08 7.00 6.95 2.97 2 10.00 8.00 -0.03
## ados_rrb_css 6 113 7.42 2.33 8.00 7.75 1.48 1 10.00 9.00 -1.34
## iq 7 124 106.51 17.41 108.00 106.73 16.31 57 146.00 89.00 -0.22
## dbaes_atotal 8 597 0.31 0.14 0.31 0.31 0.14 0 0.78 0.78 0.03
## dbaes_btotal 9 597 0.33 0.14 0.33 0.33 0.14 0 0.81 0.81 0.08
## kurtosis se
## z_ds_group* NaN 0.00
## sex* NA NA
## age 4.63 0.22
## ados_age 0.14 3.56
## ados_sa_css -0.89 0.20
## ados_rrb_css 1.46 0.22
## iq 0.19 1.56
## dbaes_atotal -0.16 0.01
## dbaes_btotal 0.09 0.01
## ------------------------------------------------------------
## group: SC_over_RRB
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 141 3.00 0.00 3.00 3.00 0.00 3.00 3.00 0.00
## sex* 2 141 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 141 7.43 5.60 5.83 6.30 3.09 2.08 30.92 28.83
## ados_age 4 29 67.86 26.37 60.00 65.16 20.76 30.00 141.00 111.00
## ados_sa_css 5 29 7.07 1.69 7.00 7.08 1.48 3.00 10.00 7.00
## ados_rrb_css 6 29 7.48 2.46 8.00 7.80 2.97 1.00 10.00 9.00
## iq 7 17 110.35 19.14 118.00 112.00 14.83 62.00 134.00 72.00
## dbaes_atotal 8 141 0.51 0.13 0.51 0.51 0.14 0.25 0.96 0.71
## dbaes_btotal 9 141 0.21 0.11 0.21 0.21 0.12 0.00 0.50 0.50
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 2.15 4.85 0.47
## ados_age 1.08 0.74 4.90
## ados_sa_css -0.10 -0.62 0.31
## ados_rrb_css -1.13 0.77 0.46
## iq -0.96 0.11 4.64
## dbaes_atotal 0.30 0.15 0.01
## dbaes_btotal 0.18 -0.52 0.01
#------------------------------------------------------------------------------
# Tests of differences in interview age across the subtypes
# Discovery
mod2use = lm(formula = interview_age ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: interview_age
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 85832 42916 11.001 1.908e-05 ***
## Residuals 886 3456234 3901
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = interview_age ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: interview_age
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 66863 33431 8.1544 0.0003096 ***
## Residuals 887 3636501 4100
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in SC across the subtypes
# Discovery
mod2use = lm(formula = dbaes_atotal ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_atotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 7.2145 3.6072 215.13 < 2.2e-16 ***
## Residuals 886 14.8563 0.0168
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = dbaes_atotal ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_atotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 6.8767 3.4383 195.02 < 2.2e-16 ***
## Residuals 887 15.6387 0.0176
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in RRB across the subtypes
# Discovery
mod2use = lm(formula = dbaes_btotal ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_btotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 6.0621 3.0310 188.29 < 2.2e-16 ***
## Residuals 886 14.2628 0.0161
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = dbaes_btotal ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_btotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 6.7172 3.3586 190.83 < 2.2e-16 ***
## Residuals 887 15.6114 0.0176
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in ADOS SA CSS across the subtypes
# Discovery
mod2use = lm(formula = ados_sa_css ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_sa_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 22.49 11.2456 2.8304 0.06215 .
## Residuals 150 595.98 3.9732
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = ados_sa_css ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_sa_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 1.09 0.5429 0.1344 0.8744
## Residuals 150 605.97 4.0398
#------------------------------------------------------------------------------
# Tests of differences in ADOS RRB CSS across the subtypes
# Discovery
mod2use = lm(formula = ados_rrb_css ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_rrb_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 7.93 3.9661 0.8739 0.4194
## Residuals 150 680.79 4.5386
# Replication
mod2use = lm(formula = ados_rrb_css ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_rrb_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 8.08 4.0416 0.768 0.4658
## Residuals 150 789.42 5.2628
#------------------------------------------------------------------------------
# Tests of differences in IQ across the subtypes
# Discovery
mod2use = lm(formula = iq ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: iq
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 90 44.82 0.1374 0.8717
## Residuals 196 63939 326.22
# Replication
mod2use = lm(formula = iq ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: iq
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 372 186.08 0.6077 0.5457
## Residuals 183 56034 306.20
#------------------------------------------------------------------------------
# Make scatterplots with difference score Z subtypes in different colors
maxScores = c(3,4)
p_disc = ggplot(data = Dverbal_Discovery, aes(x = dbaes_atotal, y = dbaes_btotal, colour = z_ds_group)) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,1) + xlim(0,1) + ggtitle("NDAR Discovery")
p1_top_left = p_disc + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Disc_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p1_top_left)
p_disc
table(Dverbal_Discovery$z_ds_group)
##
## RRB_over_SC SC_equal_RRB SC_over_RRB
## 159 576 154
cor_res = cor.test(Dverbal_Discovery$dbaes_atotal, Dverbal_Discovery$dbaes_btotal)
cor_res
##
## Pearson's product-moment correlation
##
## data: Dverbal_Discovery$dbaes_atotal and Dverbal_Discovery$dbaes_btotal
## t = 6.6829, df = 887, p-value = 4.134e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1554332 0.2806578
## sample estimates:
## cor
## 0.2189469
p_rep = ggplot(data = Dverbal_Replication, aes(x = dbaes_atotal, y = dbaes_btotal, colour = z_ds_group)) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,1) + xlim(0,1) + ggtitle("NDAR Replication")
p2_bottom_left = p_rep + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Rep_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p2_bottom_left)
p_rep
table(Dverbal_Replication$z_ds_group)
##
## RRB_over_SC SC_equal_RRB SC_over_RRB
## 152 597 141
cor_res = cor.test(Dverbal_Replication$dbaes_atotal, Dverbal_Replication$dbaes_btotal)
cor_res
##
## Pearson's product-moment correlation
##
## data: Dverbal_Replication$dbaes_atotal and Dverbal_Replication$dbaes_btotal
## t = 7.1459, df = 888, p-value = 1.864e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1700824 0.2943934
## sample estimates:
## cor
## 0.2331903
#------------------------------------------------------------------------------
# Run supervised model with Discovery as training and Replication as Test
# run validation
# make subtypes using z-scores computed from the mean and sd of the training set
train_data = Dverbal_Discovery
test_data = Dverbal_Replication
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
tmp_train = make_subtype(data2use = train_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
mean2use = mean(test_data[,vars2use[1]] - test_data[,vars2use[2]])
sd2use = sd(test_data[,vars2use[1]] - test_data[,vars2use[2]])
tmp_test = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#==============================================================================
#==============================================================================
# make labels based on train mean and sd
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
train_mean = mean2use
train_sd = sd2use
pred_labels = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = train_mean,
sd2use = train_sd)
confmat = table(tmp_test$z_ds_group,pred_labels$z_ds_group)
acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/dim(pred_labels)[1]
# # compute model
# mod2use = svm(x = tmp_train[,vars2use], y = tmp_train$z_ds_group)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
#==============================================================================
#==============================================================================
# plot confusion matrix
setHook("grid.newpage", function() pushViewport(viewport(x=1,y=1,width=0.9, height=0.9, name="vp", just=c("right","top"))), action="prepend")
pheatmap(confmat/rowSums(confmat)*100, display_numbers = confmat, color = colorRampPalette(c('white','red'))(100), cluster_rows = FALSE, cluster_cols = FALSE, fontsize_number = fontSize, fontsize_row = fontSize, fontsize_col = fontSize,labels_row = c("RRB>SC","SC=RRB","SC>RRB"),labels_col = c("RRB>SC","SC=RRB","SC>RRB"),angle_col=90,
breaks= seq(0,100, length=100))
setHook("grid.newpage", NULL, "replace")
grid::grid.text("Actual Labels", y=-0.07, gp=gpar(fontsize=fontSize))
grid::grid.text("Predicted Labels", x=-0.07, rot=90, gp=gpar(fontsize=fontSize))
# show accuracy
true_accuracy = acc
true_accuracy
## [1] 0.9820225
#================================================================================
#================================================================================
# #------------------------------------------------------------------------------
# # Permute subtype labels to examine how well supervised model performs
#
# # nperm = 10000
#
# # make subtypes using z-scores computed from the mean and sd of the training set
# train_data = Dverbal_Discovery
# test_data = Dverbal_Replication
# mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
# sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
# tmp_train = make_subtype(data2use = train_data,
# z_thresh = z_thresh,
# mean2use = mean2use,
# sd2use = sd2use)
#
# mean2use = mean(test_data[,vars2use[1]] - test_data[,vars2use[2]])
# sd2use = sd(test_data[,vars2use[1]] - test_data[,vars2use[2]])
# tmp_test = make_subtype(data2use = test_data,
# z_thresh = z_thresh,
# mean2use = mean2use,
# sd2use = sd2use)
#
# acc = vector(length = nperm)
# for (iperm in 1:nperm){
# # set seed for reproducibility
# set.seed(iperm)
#
# sc_perm = sample(train_data[,vars2use[1]])
# rrb_perm = sample(train_data[,vars2use[2]])
# perm_mean2use = mean(sc_perm - rrb_perm)
# perm_sd2use = sd(sc_perm - rrb_perm)
# # perm_mean2use = mean(train_data[,vars2use[1]] - rrb_perm)
# # perm_sd2use = sd(train_data[,vars2use[1]] - rrb_perm)
# pred_labels = make_subtype(data2use = tmp_test,
# z_thresh = z_thresh,
# mean2use = perm_mean2use,
# sd2use = perm_sd2use)
# confmat = table(tmp_test$z_ds_group,pred_labels$z_ds_group)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/dim(pred_labels)[1]
#
# # compute model
# permuted_labels = sample(tmp_train$z_ds_group)
# mod2use = svm(x = tmp_train[,vars2use], y = permuted_labels)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc[iperm] = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
# #============================================================================
# } # for (iperm in 1:nperm)
#
# df2plot = data.frame(Accuracy = acc)
# p = ggplot(data = df2plot, aes(x = Accuracy)) + geom_histogram() + geom_vline(xintercept=true_accuracy)
# p
#
# # compute p-value
# pval = sum(c(true_accuracy,acc)>=true_accuracy)/(nperm+1)
# pval
#================================================================================
#================================================================================
#------------------------------------------------------------------------------
# Plot difference score Z subtypes in Discovery set
maxScores = c(3,4)
# Discovery - make plot with all individuals shown as lines
df2use = Dverbal_Discovery[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(tmp_train$z_ds_group)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + ylim(0,1) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p3_middle_top = p + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Disc_jitterplot_z%s.pdf",as.character(z_thresh))), plot = p3_middle_top)
p
# Plot difference score Z subtypes in Replication set
maxScores = c(3,4)
# Replication - make plot with all individuals shown as lines
df2use = Dverbal_Replication[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(tmp_test$z_ds_group)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + ylim(0,1) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p4_middle_bottom = p + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Rep_jitterplot_z%s.pdf",as.character(z_thresh))), plot = p4_middle_bottom)
p
#------------------------------------------------------------------------------
# Apply NDAR subtypes to EU-AIMS LEAP data
# make SC and RRB percentages since EU-AIMS data is specified as percentages
Dverbal = read.csv(file.path(datapath,"tidy_verbal.csv"))
euaims_data = read.csv(file.path(datapath,"tidy_euaims.csv"))
mask1 = euaims_data$Diagnosis=="ASD"
mask2 = (is.na(euaims_data$A1_pct_severity) | is.na(euaims_data$A2_pct_severity) | is.na(euaims_data$A3_pct_severity) | is.na(euaims_data$B1_pct_severity) | is.na(euaims_data$B2_pct_severity) | is.na(euaims_data$B3_pct_severity) | is.na(euaims_data$B4_pct_severity))
euaims_data = subset(euaims_data, (mask1 & !mask2))
Dverbal[,vars2use[1]] = Dverbal[,vars2use[1]]
Dverbal[,vars2use[2]] = Dverbal[,vars2use[2]]
euaims_data[,vars2use[1]] = (euaims_data$A1_pct_severity +
euaims_data$A2_pct_severity +
euaims_data$A3_pct_severity)/3
euaims_data[,vars2use[2]] = (euaims_data$B1_pct_severity +
euaims_data$B2_pct_severity +
euaims_data$B3_pct_severity +
euaims_data$B4_pct_severity)/4
train_data = Dverbal
test_data = euaims_data
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]], na.rm=TRUE)
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]], na.rm=TRUE)
c(mean2use, sd2use)
## [1] -0.01045243 0.19482749
tmp_train = make_subtype(data2use = train_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
tmp_test = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#===========================================================================
#===========================================================================
# # compute model
# mod2use = svm(x = tmp_train[,vars2use], y = tmp_train$z_ds_group)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
#
# tmp_test$svm_pred_labels = pred_labels
#
# # plot confusion matrix
# setHook("grid.newpage", function() pushViewport(viewport(x=1,y=1,width=0.9, height=0.9, name="vp", just=c("right","top"))), action="prepend")
# pheatmap(confmat/rowSums(confmat)*100, display_numbers = confmat, color = colorRampPalette(c('white','red'))(100), cluster_rows = FALSE, cluster_cols = FALSE, fontsize_number = fontSize, fontsize_row = fontSize, fontsize_col = fontSize,labels_row = c("RRB>SC","SC=RRB","SC>RRB"),labels_col = c("RRB>SC","SC=RRB","SC>RRB"),angle_col=90,
# breaks= seq(0,100, length=100))
# setHook("grid.newpage", NULL, "replace")
# grid::grid.text("Actual Labels", y=-0.07, gp=gpar(fontsize=fontSize))
# grid::grid.text("Predicted Labels", x=-0.07, rot=90, gp=gpar(fontsize=fontSize))
#===========================================================================
#===========================================================================
# scatterplot
p1 = ggplot(data = tmp_train, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(z_ds_group))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("NDAR ALL")
p1
p2 = ggplot(data = tmp_test, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(z_ds_group))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("EU-AIMS with Groups from NDAR ALL")
p2
#===========================================================================
#===========================================================================
# p3 = ggplot(data = tmp_test, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(pred_labels))) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("EU-AIMS")
# p5_bottom_right = p3 + guides(colour=FALSE)
# ggsave(filename = file.path(plotpath, sprintf("final_EUAIMS_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p5_bottom_right)
# p3
#===========================================================================
#===========================================================================
# # write out EU-AIMS LEAP data with subgroups defined by NDAR All
write.csv(tmp_test, file.path(datapath, sprintf("tidy_euaims_NDAR_subtypes_diffscore_z%s.csv",as.character(z_thresh))))
#===========================================================================
#===========================================================================
# # Make final plot
# p_final = p1_top_left + p3_middle_top + plot_spacer() + p2_bottom_left + p4_middle_bottom + p5_bottom_right + plot_layout(nrow=3, ncol=3, widths = c(4,4,4), heights = c(8,8,8))
# ggsave(filename = file.path(plotpath, sprintf("final_NDAR_EUAIMS_subtypes_plot_z%s.pdf",as.character(z_thresh))), plot = p_final)
# p_final
#===========================================================================
#===========================================================================
#------------------------------------------------------------------------------
# Integrate subtypes with rest of EU-AIMS LEAP data
euaims_data = read.csv(file.path(datapath,"tidy_euaims.csv"))
td_df = subset(euaims_data, euaims_data$Diagnosis=="TD",select=2:6)
td_df$A_pct_severity = as.numeric(NA)
td_df$B_pct_severity = as.numeric(NA)
td_df$subgrp = "TD"
#===========================================================================
#===========================================================================
# cols2use = c("subid","age","Centre","Schedule","Diagnosis","dbaes_atotal","dbaes_btotal","svm_pred_labels")
cols2use = c("subid","age","Centre","Schedule","Diagnosis","dbaes_atotal","dbaes_btotal","z_ds_group")
#===========================================================================
#===========================================================================
tmp_asd = tmp_test[,cols2use]
colnames(tmp_asd)[6] = "A_pct_severity"
colnames(tmp_asd)[7] = "B_pct_severity"
colnames(tmp_asd)[8] = "subgrp"
asd_df = tmp_asd
all_data = rbind(td_df,asd_df)
fname = "/Users/mlombardo/Dropbox/euaims/data/rsfmri_preproc/euaims_preproc.xlsx"
pp_data = read_excel(fname)
mask = pp_data$notes=="ok"
pp_data = subset(pp_data, mask)
asd_df = merge(pp_data[,c("subid","sex")],asd_df, by = "subid")
td_df = merge(pp_data[,c("subid","sex")],td_df, by = "subid")
all_data = rbind(td_df,asd_df)
data2write = merge(pp_data, all_data, by = "subid")
data2write$age = data2write$age.x
data2write$sex = data2write$sex.y
print(table(data2write$Schedule, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## A 5 43 32 78
## B 1 52 32 83
## C 4 35 24 59
## D 0 18 20 23
print(table(data2write$Centre, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## CAMBRIDGE 4 31 7 29
## KINGS_COLLEGE 5 56 45 78
## MANNHEIM 0 0 0 34
## NIJMEGEN 1 49 43 64
## UTRECHT 0 12 13 38
print(table(data2write$sex, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## Female 5 39 27 88
## Male 5 109 81 155
#DSM-5 - find best split that balances participants across sites
a = findBestSplit(asd_df, seed_range = c(172342)) #,300001:500000))
## [1] 172342
best_seeds = a$seed[!is.na(a$discrepancy) & a$discrepancy==min(a$discrepancy, na.rm = TRUE)]
print(best_seeds)
## [1] 172342
# Split datasets -------------------------------------------------------------
rngSeed = best_seeds[1]
# split Schedule A dataset
dset2use = subset(asd_df, asd_df$Schedule=="A")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
A_Discovery = tmp_d[[2]]
A_Replication = tmp_d[[1]]
# split Schedule B dataset
dset2use = subset(asd_df, asd_df$Schedule=="B")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
B_Discovery = tmp_d[[2]]
B_Replication = tmp_d[[1]]
# split Schedule C dataset
dset2use = subset(asd_df, asd_df$Schedule=="C")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
C_Discovery = tmp_d[[2]]
C_Replication = tmp_d[[1]]
# split Schedule D dataset
dset2use = subset(asd_df, asd_df$Schedule=="D")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
D_Discovery = tmp_d[[2]]
D_Replication = tmp_d[[1]]
df_Disc = rbind(A_Discovery, B_Discovery, C_Discovery, D_Discovery)
df_Rep = rbind(A_Replication, B_Replication, C_Replication, D_Replication)
a = table(df_Disc$Schedule, df_Disc$Centre); print(a)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A 6 17 10 7
## B 9 16 14 3
## C 6 9 14 2
## D 0 10 10 0
b = table(df_Rep$Schedule, df_Rep$Centre); print(b)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A 7 18 10 5
## B 8 17 13 5
## C 6 10 13 3
## D 0 9 9 0
print(a-b)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A -1 -1 0 2
## B 1 -1 1 -2
## C 0 -1 1 -1
## D 0 1 1 0
print(sum(rowSums(abs(a-b))))
## [1] 14
data2write$dataset = NA
mask = is.element(data2write$subid,df_Disc$subid)
data2write[mask,"dataset"] = "Discovery"
mask = is.element(data2write$subid,df_Rep$subid)
data2write[mask,"dataset"] = "Replication"
asd_Disc = subset(data2write, data2write$dataset=="Discovery" & data2write$Diagnosis=="ASD")
asd_Rep = subset(data2write, data2write$dataset=="Replication" & data2write$Diagnosis=="ASD")
# # find which seed gives best TD age-match -------------------------------------
# seeds = 1:1000
# pvals = data.frame(matrix(nrow = length(seeds), ncol = 2))
# for (i in 1:length(seeds)) {
# res = findTDAgeMatch(data2write, seed_range = c(seeds[i],seeds[i]))
# td_Disc_matched = res[[2]]
# td_Rep_matched = res[[1]]
# tres = t.test(td_Disc_matched$age, asd_Disc$age)
# pvals[i,1] = tres$p.value
# tres = t.test(td_Rep_matched$age, asd_Rep$age)
# pvals[i,2] = tres$p.value
# #print(i)
# }
# a = sort.int(pvals[,1], decreasing = TRUE, index.return = TRUE)
# b=pvals[a$ix,]
seed2use = 929
res = findTDAgeMatch(data2write, seed_range = c(seed2use,seed2use))
td_Disc_matched = res[[2]]
td_Rep_matched = res[[1]]
mask = is.element(data2write$subid, td_Disc_matched$subid)
data2write$dataset[mask] = "Discovery"
mask = is.element(data2write$subid, td_Rep_matched$subid)
data2write$dataset[mask] = "Replication"
print(table(data2write$dataset, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## Discovery 8 70 55 121
## Replication 2 78 53 122
fname2save = here(sprintf("asd_subgrp_data_rsfmri_ALL_DSM5_diffzscoreGrps_z%s.csv",as.character(z_thresh)))
write.csv(data2write,file = fname2save)
#------------------------------------------------------------------------------
# Descriptive stats
df2use = data2write[,c("subid","Diagnosis","dataset","Centre","meanFD","age","sex","subgrp","viq_all","piq_all","fsiq4_all","A_pct_severity","B_pct_severity","ADI_social_total","ADI_communication_total","ADI_RRB_total","ados_2_SA_CSS","ados_2_RRB_CSS","SRS_tscore","SRS_tscore_self","RBS_total","SSP_total","vabsdscoresc_dss","vabsdscoresd_dss","vabsdscoress_dss","vabsabcabc_standard")]
mask = df2use==999 | df2use==777
df2use[mask] = NA
df2use$age = df2use$age/365
cols2use = c("dataset","subgrp","age","meanFD","viq_all","piq_all","fsiq4_all","A_pct_severity","B_pct_severity","ADI_social_total","ADI_communication_total","ADI_RRB_total","ados_2_SA_CSS","ados_2_RRB_CSS","SRS_tscore","SRS_tscore_self","RBS_total","SSP_total","vabsdscoresc_dss","vabsdscoresd_dss","vabsdscoress_dss","vabsabcabc_standard")
res=describeBy(x = df2use[,cols2use], group = c("subgrp","dataset"))
res
##
## Descriptive statistics by group
## subgrp: RRB_over_SC
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 8 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 8 NaN NA NA NaN NA Inf -Inf
## age 3 8 17.53 7.05 21.73 17.53 2.56 7.89 23.88
## meanFD 4 8 0.27 0.36 0.18 0.27 0.07 0.06 1.14
## viq_all 5 8 103.86 13.94 107.50 103.86 12.60 78.00 123.85
## piq_all 6 8 101.37 9.62 99.50 101.37 9.64 87.00 114.00
## fsiq4_all 7 8 102.75 12.37 103.00 102.75 10.38 80.00 118.01
## A_pct_severity 8 8 0.20 0.11 0.19 0.20 0.13 0.05 0.35
## B_pct_severity 9 8 0.47 0.08 0.47 0.47 0.09 0.36 0.59
## ADI_social_total 10 8 17.50 7.80 20.50 17.50 7.41 5.00 26.00
## ADI_communication_total 11 8 16.25 6.30 17.00 16.25 7.41 6.00 24.00
## ADI_RRB_total 12 8 8.50 1.41 8.50 8.50 2.22 7.00 10.00
## ados_2_SA_CSS 13 8 3.75 2.87 3.00 3.75 2.22 1.00 9.00
## ados_2_RRB_CSS 14 8 3.88 3.98 1.00 3.88 0.00 1.00 9.00
## SRS_tscore 15 4 72.25 13.72 70.50 72.25 12.60 58.00 90.00
## SRS_tscore_self 16 4 59.00 7.12 61.00 59.00 4.45 49.00 65.00
## RBS_total 17 4 18.00 8.83 17.50 18.00 8.90 8.00 29.00
## SSP_total 18 3 142.00 24.06 140.00 142.00 31.13 119.00 167.00
## vabsdscoresc_dss 19 5 84.20 18.63 77.00 84.20 14.83 67.00 115.00
## vabsdscoresd_dss 20 5 69.00 9.46 74.00 69.00 7.41 57.00 79.00
## vabsdscoress_dss 21 5 70.40 8.99 71.00 70.40 2.97 57.00 82.00
## vabsabcabc_standard 22 5 72.80 10.76 72.00 72.80 7.41 59.00 88.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 16.00 -0.41 -1.98 2.49
## meanFD 1.09 1.77 1.50 0.13
## viq_all 45.85 -0.43 -0.93 4.93
## piq_all 27.00 0.09 -1.56 3.40
## fsiq4_all 38.01 -0.40 -1.08 4.37
## A_pct_severity 0.30 0.01 -1.94 0.04
## B_pct_severity 0.23 0.05 -1.67 0.03
## ADI_social_total 21.00 -0.41 -1.67 2.76
## ADI_communication_total 18.00 -0.26 -1.52 2.23
## ADI_RRB_total 3.00 0.00 -2.05 0.50
## ados_2_SA_CSS 8.00 0.70 -1.16 1.01
## ados_2_RRB_CSS 8.00 0.44 -2.00 1.41
## SRS_tscore 32.00 0.24 -2.00 6.86
## SRS_tscore_self 16.00 -0.50 -1.88 3.56
## RBS_total 21.00 0.11 -1.98 4.42
## SSP_total 48.00 0.08 -2.33 13.89
## vabsdscoresc_dss 48.00 0.71 -1.35 8.33
## vabsdscoresd_dss 22.00 -0.23 -2.10 4.23
## vabsdscoress_dss 25.00 -0.23 -1.46 4.02
## vabsabcabc_standard 29.00 0.14 -1.65 4.81
## ------------------------------------------------------------
## subgrp: SC_equal_RRB
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 70 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 70 NaN NA NA NaN NA Inf -Inf
## age 3 70 16.17 5.67 14.92 15.91 4.95 7.08 30.28
## meanFD 4 70 0.29 0.47 0.19 0.22 0.13 0.03 3.95
## viq_all 5 69 96.91 18.86 97.35 96.93 21.72 61.00 136.00
## piq_all 6 69 99.63 22.05 102.00 99.62 23.62 58.00 150.00
## fsiq4_all 7 70 98.42 19.14 102.25 98.79 20.07 60.00 143.00
## A_pct_severity 8 70 0.31 0.14 0.31 0.31 0.15 0.00 0.63
## B_pct_severity 9 70 0.30 0.15 0.29 0.29 0.16 0.01 0.69
## ADI_social_total 10 70 16.94 6.97 18.00 17.30 8.90 2.00 27.00
## ADI_communication_total 11 70 13.73 5.95 14.00 13.79 6.67 0.00 26.00
## ADI_RRB_total 12 70 5.20 2.55 5.00 5.21 2.97 0.00 12.00
## ados_2_SA_CSS 13 69 6.30 2.62 7.00 6.40 2.97 1.00 10.00
## ados_2_RRB_CSS 14 69 5.01 2.78 5.00 5.00 2.97 1.00 10.00
## SRS_tscore 15 61 72.70 12.49 74.00 73.12 14.83 45.00 95.00
## SRS_tscore_self 16 33 63.09 13.23 64.00 62.04 13.34 43.00 94.00
## RBS_total 17 58 19.72 16.76 17.00 17.38 14.08 0.00 90.00
## SSP_total 18 39 132.74 31.42 136.00 133.39 34.10 53.00 189.00
## vabsdscoresc_dss 19 68 73.04 17.83 75.00 73.66 11.86 21.00 122.00
## vabsdscoresd_dss 20 67 73.16 16.69 73.00 73.00 11.86 25.00 131.00
## vabsdscoress_dss 21 68 70.47 15.99 73.00 71.54 13.34 20.00 104.00
## vabsabcabc_standard 22 67 70.84 13.34 72.00 71.13 10.38 20.00 103.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 23.20 0.46 -0.58 0.68
## meanFD 3.92 6.60 47.90 0.06
## viq_all 75.00 -0.11 -0.84 2.27
## piq_all 92.00 -0.09 -0.62 2.65
## fsiq4_all 83.00 -0.16 -0.77 2.29
## A_pct_severity 0.63 0.00 -0.65 0.02
## B_pct_severity 0.68 0.42 -0.31 0.02
## ADI_social_total 25.00 -0.36 -1.02 0.83
## ADI_communication_total 26.00 -0.07 -0.79 0.71
## ADI_RRB_total 12.00 0.05 -0.42 0.30
## ados_2_SA_CSS 9.00 -0.39 -1.06 0.32
## ados_2_RRB_CSS 9.00 -0.31 -1.09 0.34
## SRS_tscore 50.00 -0.23 -0.93 1.60
## SRS_tscore_self 51.00 0.47 -0.53 2.30
## RBS_total 90.00 1.70 3.93 2.20
## SSP_total 136.00 -0.26 -0.47 5.03
## vabsdscoresc_dss 101.00 -0.39 1.13 2.16
## vabsdscoresd_dss 106.00 0.20 1.95 2.04
## vabsdscoress_dss 84.00 -0.75 0.61 1.94
## vabsabcabc_standard 83.00 -0.57 2.42 1.63
## ------------------------------------------------------------
## subgrp: SC_over_RRB
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 55 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 55 NaN NA NA NaN NA Inf -Inf
## age 3 55 16.43 5.12 15.91 16.20 5.09 7.78 29.40
## meanFD 4 55 0.23 0.23 0.15 0.18 0.10 0.04 1.08
## viq_all 5 54 98.07 18.80 100.00 97.54 19.26 65.55 142.00
## piq_all 6 54 99.26 20.81 102.50 100.66 19.27 52.43 136.38
## fsiq4_all 7 55 98.84 18.36 101.36 99.53 18.73 59.00 128.30
## A_pct_severity 8 55 0.44 0.14 0.45 0.44 0.12 0.19 0.82
## B_pct_severity 9 55 0.16 0.09 0.15 0.15 0.11 0.00 0.33
## ADI_social_total 10 55 18.02 6.38 18.00 18.40 7.41 3.00 28.00
## ADI_communication_total 11 55 14.58 4.88 15.00 14.78 4.45 2.00 24.00
## ADI_RRB_total 12 55 2.93 1.94 3.00 2.82 1.48 0.00 8.00
## ados_2_SA_CSS 13 53 6.30 2.50 7.00 6.44 2.97 1.00 10.00
## ados_2_RRB_CSS 14 53 4.66 2.79 5.00 4.56 2.97 1.00 10.00
## SRS_tscore 15 49 72.22 11.09 74.00 72.73 10.38 44.00 90.00
## SRS_tscore_self 16 23 61.87 9.45 61.00 61.21 8.90 42.00 89.00
## RBS_total 17 48 14.04 12.97 11.50 12.50 12.60 0.00 54.00
## SSP_total 18 37 141.51 28.32 141.00 142.26 35.58 78.00 186.00
## vabsdscoresc_dss 19 50 70.52 16.34 71.50 72.10 11.12 21.00 103.00
## vabsdscoresd_dss 20 50 69.82 16.53 70.00 69.92 11.86 17.00 112.00
## vabsdscoress_dss 21 50 67.16 15.86 68.00 68.85 14.08 20.00 95.00
## vabsabcabc_standard 22 50 66.92 15.46 70.00 68.47 9.64 6.00 96.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 21.62 0.42 -0.35 0.69
## meanFD 1.04 2.29 4.80 0.03
## viq_all 76.45 0.25 -0.44 2.56
## piq_all 83.96 -0.56 -0.45 2.83
## fsiq4_all 69.30 -0.35 -0.94 2.48
## A_pct_severity 0.63 0.26 -0.18 0.02
## B_pct_severity 0.33 0.21 -1.03 0.01
## ADI_social_total 25.00 -0.44 -0.66 0.86
## ADI_communication_total 22.00 -0.38 -0.29 0.66
## ADI_RRB_total 8.00 0.62 -0.09 0.26
## ados_2_SA_CSS 9.00 -0.44 -0.86 0.34
## ados_2_RRB_CSS 9.00 -0.14 -1.16 0.38
## SRS_tscore 46.00 -0.43 -0.19 1.58
## SRS_tscore_self 47.00 0.68 1.21 1.97
## RBS_total 54.00 1.20 1.27 1.87
## SSP_total 108.00 -0.19 -1.09 4.66
## vabsdscoresc_dss 82.00 -1.13 2.23 2.31
## vabsdscoresd_dss 95.00 -0.33 1.48 2.34
## vabsdscoress_dss 75.00 -1.06 1.42 2.24
## vabsabcabc_standard 90.00 -1.70 4.59 2.19
## ------------------------------------------------------------
## subgrp: TD
## dataset: Discovery
## vars n mean sd median trimmed mad min
## dataset* 1 121 NaN NA NA NaN NA Inf
## subgrp* 2 121 NaN NA NA NaN NA Inf
## age 3 121 16.83 5.23 16.65 16.73 5.69 7.22
## meanFD 4 121 0.18 0.15 0.13 0.15 0.07 0.03
## viq_all 5 119 104.52 19.70 105.00 105.03 19.27 46.00
## piq_all 6 119 106.10 19.47 107.00 107.53 17.79 49.00
## fsiq4_all 7 119 105.72 18.33 108.18 106.99 16.58 53.00
## A_pct_severity 8 0 NaN NA NA NaN NA Inf
## B_pct_severity 9 0 NaN NA NA NaN NA Inf
## ADI_social_total 10 0 NaN NA NA NaN NA Inf
## ADI_communication_total 11 0 NaN NA NA NaN NA Inf
## ADI_RRB_total 12 0 NaN NA NA NaN NA Inf
## ados_2_SA_CSS 13 0 NaN NA NA NaN NA Inf
## ados_2_RRB_CSS 14 0 NaN NA NA NaN NA Inf
## SRS_tscore 15 68 47.84 9.40 45.00 46.32 5.19 37.00
## SRS_tscore_self 16 71 46.69 4.85 46.00 46.26 4.45 39.00
## RBS_total 17 68 2.15 4.74 0.00 0.95 0.00 0.00
## SSP_total 18 59 177.86 12.71 182.00 179.78 8.90 122.00
## vabsdscoresc_dss 19 34 91.97 25.44 99.50 93.50 21.50 21.00
## vabsdscoresd_dss 20 34 90.74 20.28 98.50 92.25 17.05 33.00
## vabsdscoress_dss 21 34 96.21 23.67 102.50 98.57 21.50 33.00
## vabsabcabc_standard 22 34 92.06 23.04 99.50 93.89 15.57 25.00
## max range skew kurtosis se
## dataset* -Inf -Inf NA NA NA
## subgrp* -Inf -Inf NA NA NA
## age 29.84 22.62 0.17 -0.51 0.48
## meanFD 0.85 0.82 2.28 5.65 0.01
## viq_all 160.00 114.00 -0.24 0.38 1.81
## piq_all 147.00 98.00 -0.69 0.32 1.79
## fsiq4_all 142.00 89.00 -0.69 0.58 1.68
## A_pct_severity -Inf -Inf NA NA NA
## B_pct_severity -Inf -Inf NA NA NA
## ADI_social_total -Inf -Inf NA NA NA
## ADI_communication_total -Inf -Inf NA NA NA
## ADI_RRB_total -Inf -Inf NA NA NA
## ados_2_SA_CSS -Inf -Inf NA NA NA
## ados_2_RRB_CSS -Inf -Inf NA NA NA
## SRS_tscore 76.00 39.00 1.54 1.44 1.14
## SRS_tscore_self 63.00 24.00 0.93 1.10 0.58
## RBS_total 27.00 27.00 3.13 10.87 0.57
## SSP_total 190.00 68.00 -1.90 4.85 1.66
## vabsdscoresc_dss 138.00 117.00 -0.71 0.36 4.36
## vabsdscoresd_dss 121.00 88.00 -0.80 0.07 3.48
## vabsdscoress_dss 129.00 96.00 -0.83 -0.31 4.06
## vabsabcabc_standard 127.00 102.00 -0.90 0.30 3.95
## ------------------------------------------------------------
## subgrp: RRB_over_SC
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 2 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 2 NaN NA NA NaN NA Inf -Inf
## age 3 2 12.03 0.82 12.03 12.03 0.86 11.45 12.61
## meanFD 4 2 0.29 0.13 0.29 0.29 0.13 0.20 0.38
## viq_all 5 2 121.00 31.11 121.00 121.00 32.62 99.00 143.00
## piq_all 6 2 118.50 41.72 118.50 118.50 43.74 89.00 148.00
## fsiq4_all 7 2 120.50 38.89 120.50 120.50 40.77 93.00 148.00
## A_pct_severity 8 2 0.16 0.00 0.16 0.16 0.00 0.15 0.16
## B_pct_severity 9 2 0.37 0.04 0.37 0.37 0.04 0.35 0.40
## ADI_social_total 10 2 14.50 2.12 14.50 14.50 2.22 13.00 16.00
## ADI_communication_total 11 2 7.50 2.12 7.50 7.50 2.22 6.00 9.00
## ADI_RRB_total 12 2 6.50 0.71 6.50 6.50 0.74 6.00 7.00
## ados_2_SA_CSS 13 2 6.00 1.41 6.00 6.00 1.48 5.00 7.00
## ados_2_RRB_CSS 14 2 4.00 4.24 4.00 4.00 4.45 1.00 7.00
## SRS_tscore 15 2 66.00 8.49 66.00 66.00 8.90 60.00 72.00
## SRS_tscore_self 16 0 NaN NA NA NaN NA Inf -Inf
## RBS_total 17 2 15.50 3.54 15.50 15.50 3.71 13.00 18.00
## SSP_total 18 2 146.00 9.90 146.00 146.00 10.38 139.00 153.00
## vabsdscoresc_dss 19 2 86.50 17.68 86.50 86.50 18.53 74.00 99.00
## vabsdscoresd_dss 20 2 71.00 4.24 71.00 71.00 4.45 68.00 74.00
## vabsdscoress_dss 21 2 85.50 13.44 85.50 85.50 14.08 76.00 95.00
## vabsabcabc_standard 22 2 79.00 2.83 79.00 79.00 2.97 77.00 81.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 1.16 0 -2.75 0.58
## meanFD 0.18 0 -2.75 0.09
## viq_all 44.00 0 -2.75 22.00
## piq_all 59.00 0 -2.75 29.50
## fsiq4_all 55.00 0 -2.75 27.50
## A_pct_severity 0.01 0 -2.75 0.00
## B_pct_severity 0.06 0 -2.75 0.03
## ADI_social_total 3.00 0 -2.75 1.50
## ADI_communication_total 3.00 0 -2.75 1.50
## ADI_RRB_total 1.00 0 -2.75 0.50
## ados_2_SA_CSS 2.00 0 -2.75 1.00
## ados_2_RRB_CSS 6.00 0 -2.75 3.00
## SRS_tscore 12.00 0 -2.75 6.00
## SRS_tscore_self -Inf NA NA NA
## RBS_total 5.00 0 -2.75 2.50
## SSP_total 14.00 0 -2.75 7.00
## vabsdscoresc_dss 25.00 0 -2.75 12.50
## vabsdscoresd_dss 6.00 0 -2.75 3.00
## vabsdscoress_dss 19.00 0 -2.75 9.50
## vabsabcabc_standard 4.00 0 -2.75 2.00
## ------------------------------------------------------------
## subgrp: SC_equal_RRB
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 78 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 78 NaN NA NA NaN NA Inf -Inf
## age 3 78 16.70 5.64 16.42 16.43 6.04 7.12 30.15
## meanFD 4 78 0.25 0.28 0.17 0.19 0.10 0.05 1.60
## viq_all 5 77 102.02 16.24 102.73 102.66 17.39 62.90 133.00
## piq_all 6 77 104.03 18.25 106.00 105.17 19.27 52.00 134.00
## fsiq4_all 7 77 103.31 16.26 106.00 104.03 16.98 64.00 131.00
## A_pct_severity 8 78 0.27 0.13 0.26 0.27 0.12 0.04 0.65
## B_pct_severity 9 78 0.24 0.13 0.22 0.23 0.12 0.00 0.67
## ADI_social_total 10 78 14.97 6.05 15.50 15.20 6.67 1.00 27.00
## ADI_communication_total 11 78 11.90 5.47 11.00 11.89 5.93 0.00 24.00
## ADI_RRB_total 12 78 3.90 2.24 4.00 3.83 2.22 0.00 9.00
## ados_2_SA_CSS 13 76 5.58 2.52 6.00 5.63 2.97 1.00 10.00
## ados_2_RRB_CSS 14 76 4.91 2.55 5.00 4.89 1.48 1.00 10.00
## SRS_tscore 15 71 66.54 11.62 67.00 66.33 13.34 43.00 90.00
## SRS_tscore_self 16 39 62.00 7.93 62.00 61.76 5.93 46.00 84.00
## RBS_total 17 68 12.82 11.02 10.00 11.52 9.64 0.00 52.00
## SSP_total 18 46 143.24 26.63 143.00 145.08 34.10 69.00 184.00
## vabsdscoresc_dss 19 71 82.11 13.83 79.00 81.18 11.86 50.00 122.00
## vabsdscoresd_dss 20 70 78.60 15.55 77.50 77.98 13.34 38.00 119.00
## vabsdscoress_dss 21 71 76.11 15.38 77.00 76.82 11.86 28.00 112.00
## vabsabcabc_standard 22 70 77.34 13.11 77.00 77.02 8.90 39.00 117.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 23.03 0.37 -0.54 0.64
## meanFD 1.55 3.39 12.54 0.03
## viq_all 70.10 -0.31 -0.77 1.85
## piq_all 82.00 -0.59 -0.02 2.08
## fsiq4_all 67.00 -0.41 -0.54 1.85
## A_pct_severity 0.61 0.44 0.19 0.01
## B_pct_severity 0.67 0.66 0.55 0.01
## ADI_social_total 26.00 -0.31 -0.68 0.68
## ADI_communication_total 24.00 0.04 -0.68 0.62
## ADI_RRB_total 9.00 0.28 -0.50 0.25
## ados_2_SA_CSS 9.00 -0.19 -0.88 0.29
## ados_2_RRB_CSS 9.00 -0.37 -0.81 0.29
## SRS_tscore 47.00 0.10 -0.85 1.38
## SRS_tscore_self 38.00 0.38 0.46 1.27
## RBS_total 52.00 1.29 1.74 1.34
## SSP_total 115.00 -0.63 -0.36 3.93
## vabsdscoresc_dss 72.00 0.58 0.29 1.64
## vabsdscoresd_dss 81.00 0.26 0.23 1.86
## vabsdscoress_dss 84.00 -0.61 1.16 1.83
## vabsabcabc_standard 78.00 0.21 1.00 1.57
## ------------------------------------------------------------
## subgrp: SC_over_RRB
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 53 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 53 NaN NA NA NaN NA Inf -Inf
## age 3 53 16.13 5.32 15.78 15.94 6.31 7.48 29.23
## meanFD 4 53 0.23 0.21 0.16 0.20 0.11 0.04 1.31
## viq_all 5 50 94.03 19.85 97.64 95.13 19.95 50.91 130.00
## piq_all 6 52 96.80 21.98 101.50 97.99 19.98 44.03 138.00
## fsiq4_all 7 51 96.21 20.47 102.00 96.81 20.76 59.00 139.00
## A_pct_severity 8 53 0.50 0.13 0.51 0.50 0.14 0.21 0.75
## B_pct_severity 9 53 0.20 0.12 0.21 0.19 0.13 0.00 0.47
## ADI_social_total 10 53 19.08 5.83 20.00 19.37 5.93 6.00 29.00
## ADI_communication_total 11 53 15.30 4.68 16.00 15.51 4.45 4.00 24.00
## ADI_RRB_total 12 53 3.91 2.60 4.00 3.77 2.97 0.00 10.00
## ados_2_SA_CSS 13 50 6.28 2.92 6.00 6.40 4.45 1.00 10.00
## ados_2_RRB_CSS 14 50 4.50 2.77 5.00 4.38 2.97 1.00 9.00
## SRS_tscore 15 45 76.60 10.42 80.00 77.38 11.86 51.00 90.00
## SRS_tscore_self 16 24 62.46 10.53 61.50 62.40 11.12 40.00 84.00
## RBS_total 17 45 21.58 15.62 18.00 20.05 11.86 1.00 73.00
## SSP_total 18 35 134.34 24.32 138.00 134.00 26.69 91.00 181.00
## vabsdscoresc_dss 19 48 70.56 15.00 70.50 71.30 10.38 21.00 102.00
## vabsdscoresd_dss 20 48 68.79 15.51 68.00 68.38 14.08 42.00 118.00
## vabsdscoress_dss 21 48 63.77 15.35 64.00 63.92 14.08 23.00 100.00
## vabsabcabc_standard 22 48 65.88 13.57 66.50 66.33 8.90 28.00 94.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 21.75 0.33 -0.81 0.73
## meanFD 1.26 3.03 11.95 0.03
## viq_all 79.09 -0.44 -0.62 2.81
## piq_all 93.97 -0.46 -0.71 3.05
## fsiq4_all 80.00 -0.29 -0.93 2.87
## A_pct_severity 0.54 -0.14 -0.89 0.02
## B_pct_severity 0.47 0.28 -0.81 0.02
## ADI_social_total 23.00 -0.47 -0.63 0.80
## ADI_communication_total 20.00 -0.38 -0.64 0.64
## ADI_RRB_total 10.00 0.45 -0.71 0.36
## ados_2_SA_CSS 9.00 -0.18 -1.32 0.41
## ados_2_RRB_CSS 8.00 -0.12 -1.32 0.39
## SRS_tscore 39.00 -0.57 -0.68 1.55
## SRS_tscore_self 44.00 0.07 -0.40 2.15
## RBS_total 72.00 1.05 0.80 2.33
## SSP_total 90.00 -0.07 -0.90 4.11
## vabsdscoresc_dss 81.00 -0.82 1.92 2.17
## vabsdscoresd_dss 76.00 0.58 0.44 2.24
## vabsdscoress_dss 77.00 -0.16 0.14 2.22
## vabsabcabc_standard 66.00 -0.43 0.86 1.96
## ------------------------------------------------------------
## subgrp: TD
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 122 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 122 NaN NA NA NaN NA Inf -Inf
## age 3 122 16.86 6.07 16.34 16.58 7.59 6.89 29.72
## meanFD 4 122 0.23 0.46 0.14 0.15 0.07 0.04 4.60
## viq_all 5 122 104.02 17.58 108.18 105.64 12.13 45.00 140.00
## piq_all 6 122 104.64 18.41 108.96 106.56 14.08 49.00 139.00
## fsiq4_all 7 122 104.94 17.14 108.09 107.29 11.86 50.00 134.00
## A_pct_severity 8 0 NaN NA NA NaN NA Inf -Inf
## B_pct_severity 9 0 NaN NA NA NaN NA Inf -Inf
## ADI_social_total 10 0 NaN NA NA NaN NA Inf -Inf
## ADI_communication_total 11 0 NaN NA NA NaN NA Inf -Inf
## ADI_RRB_total 12 0 NaN NA NA NaN NA Inf -Inf
## ados_2_SA_CSS 13 0 NaN NA NA NaN NA Inf -Inf
## ados_2_RRB_CSS 14 0 NaN NA NA NaN NA Inf -Inf
## SRS_tscore 15 65 47.23 9.34 44.00 45.66 4.45 37.00 90.00
## SRS_tscore_self 16 61 48.44 6.84 47.00 47.63 5.93 39.00 69.00
## RBS_total 17 63 3.08 11.54 0.00 0.86 0.00 0.00 89.00
## SSP_total 18 54 174.93 19.38 182.00 178.41 6.67 75.00 190.00
## vabsdscoresc_dss 19 39 92.74 25.45 96.00 95.82 20.76 21.00 125.00
## vabsdscoresd_dss 20 39 91.10 22.65 97.00 93.91 14.83 27.00 122.00
## vabsdscoress_dss 21 39 98.90 27.04 103.00 102.12 17.79 20.00 132.00
## vabsabcabc_standard 22 38 93.00 25.39 100.00 96.44 15.57 20.00 126.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 22.83 0.33 -0.97 0.55
## meanFD 4.56 7.54 64.83 0.04
## viq_all 95.00 -0.99 1.51 1.59
## piq_all 90.00 -0.93 0.67 1.67
## fsiq4_all 84.00 -1.28 1.67 1.55
## A_pct_severity -Inf NA NA NA
## B_pct_severity -Inf NA NA NA
## ADI_social_total -Inf NA NA NA
## ADI_communication_total -Inf NA NA NA
## ADI_RRB_total -Inf NA NA NA
## ados_2_SA_CSS -Inf NA NA NA
## ados_2_RRB_CSS -Inf NA NA NA
## SRS_tscore 53.00 2.14 5.82 1.16
## SRS_tscore_self 30.00 1.05 0.48 0.88
## RBS_total 89.00 6.60 45.89 1.45
## SSP_total 115.00 -2.88 10.92 2.64
## vabsdscoresc_dss 104.00 -1.21 1.00 4.08
## vabsdscoresd_dss 95.00 -1.34 1.26 3.63
## vabsdscoress_dss 112.00 -1.26 1.10 4.33
## vabsabcabc_standard 106.00 -1.42 1.40 4.12
#------------------------------------------------------------------------------
# Table of subtypes X sex
# Discovery
data2use = subset(data2write,data2write$dataset=="Discovery")
table(data2use$subgrp,data2use$sex)
##
## Female Male
## RRB_over_SC 4 4
## SC_equal_RRB 17 53
## SC_over_RRB 14 41
## TD 41 80
cs_res = chisq.test(data2use$subgrp,data2use$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: data2use$subgrp and data2use$sex
## X-squared = 4.028, df = 3, p-value = 0.2585
# Replication
data2use = subset(data2write,data2write$dataset=="Replication")
table(data2use$subgrp,data2use$sex)
##
## Female Male
## RRB_over_SC 1 1
## SC_equal_RRB 22 56
## SC_over_RRB 13 40
## TD 47 75
cs_res = chisq.test(data2use$subgrp,data2use$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: data2use$subgrp and data2use$sex
## X-squared = 4.4851, df = 3, p-value = 0.2136
#------------------------------------------------------------------------------
vars2analyze = c("age","meanFD","viq_all","piq_all","fsiq4_all",
"A_pct_severity","B_pct_severity",
"ADI_social_total","ADI_communication_total","ADI_RRB_total",
"ados_2_SA_CSS","ados_2_RRB_CSS",
"SRS_tscore_self","RBS_total","SSP_total",
"vabsdscoress_dss","vabsdscoresd_dss","vabsdscoresc_dss","vabsabcabc_standard")
vnames = c("Age","Mean FD","VIQ","PIQ","FIQ","ADI-R SC","ADI-R RRB","ADI-R Social","ADI-R Communication","ADI-R RRB","ADOS SA CSS","ADOS RRB CSS","SRS","RBS","SSP","Vineland Socialization","Vineland Daily Living Skills","Vineland Communication","Vineland ABC")
cnames = c("All_Disc.fstat","All_Disc.pval","All_Rep.fstat","All_Rep.pval",
"SCequalRRB_vs_SCoverRRB_Disc.fstat","SCequalRRB_vs_SCoverRRB_Disc.tstat",
"SCequalRRB_vs_SCoverRRB_Disc.pval","SCequalRRB_vs_SCoverRRB_Disc.es",
"SCequalRRB_vs_SCoverRRB_Rep.fstat","SCequalRRB_vs_SCoverRRB_Rep.tstat",
"SCequalRRB_vs_SCoverRRB_Rep.pval","SCequalRRB_vs_SCoverRRB_Rep.es",
"SCequalRRB_vs_SCoverRRB.repBF")
output_res = data.frame(matrix(nrow = length(vars2analyze),ncol = length(cnames)))
colnames(output_res) = cnames
rownames(output_res) = vars2analyze
output_res$varNames = vars2analyze
for (ivar in 1:length(vars2analyze)){
y_var = vars2analyze[ivar]
# print(y_var)
#----------------------------------------------------------------------------
# Discovery
df4mod = subset(df2use, df2use$dataset=="Discovery")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"All_Disc.fstat"] = res["subgrp","F-value"]
output_res[y_var,"All_Disc.pval"] = res["subgrp","p-value"]
#----------------------------------------------------------------------------
# Replication
df4mod = subset(df2use, df2use$dataset=="Replication")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"All_Rep.fstat"] = res["subgrp","F-value"]
output_res[y_var,"All_Rep.pval"] = res["subgrp","p-value"]
#----------------------------------------------------------------------------
# Discovery
df4mod = subset(df2use, df2use$dataset=="Discovery" & !is.element(df2use$subgrp,c("TD","RRB_over_SC")))
n1 = sum(df4mod$subgrp=="SC_equal_RRB")
m1 = sum(df4mod$subgrp=="SC_over_RRB")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.fstat"] = res["subgrp","F-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.pval"] = res["subgrp","p-value"]
res = summary(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"] = res$tTable[2,"t-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.es"] = cohens_d(df4mod[df4mod$subgrp=="SC_equal_RRB",y_var],
df4mod[df4mod$subgrp=="SC_over_RRB",y_var])
#----------------------------------------------------------------------------
# Replication
df4mod = subset(df2use, df2use$dataset=="Replication" & !is.element(df2use$subgrp,c("TD","RRB_over_SC")))
n2 = sum(df4mod$subgrp=="SC_equal_RRB")
m2 = sum(df4mod$subgrp=="SC_over_RRB")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.fstat"] = res["subgrp","F-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.pval"] = res["subgrp","p-value"]
res = summary(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.tstat"] = res$tTable[2,"t-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.es"] = cohens_d(df4mod[df4mod$subgrp=="SC_equal_RRB",y_var],
df4mod[df4mod$subgrp=="SC_over_RRB",y_var])
#----------------------------------------------------------------------------
# Replication Bayes Factor
res_bf = BFSALL(tobs = output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"],
trep = output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"],
n1 = n1,
n2 = n2,
m1 = m1,
m2 = m2,
sample = 2,
Type = 'ALL')
output_res[y_var,"SCequalRRB_vs_SCoverRRB.repBF"] = res_bf["Replication BF","Replication 1"]
# make a plot
colors2use = get_ggColorHue(3)
df4plot = subset(df2use, df2use$subgrp!="RRB_over_SC")
p = ggplot(data = df4plot, aes_string(x = "subgrp", y = y_var, colour = "subgrp")) + facet_grid(. ~ dataset)
p = p + geom_jitter() + geom_boxplot(fill = NA, colour = "#000000", outlier.shape = NA)
p = p + ylab(vnames[ivar]) + xlab("Group") +
scale_x_discrete(labels=c("SC_equal_RRB"="SC=RRB","SC_over_RRB"="SC>RRB","TD"="TD")) +
scale_colour_manual(values = c(colors2use[2:3],"grey60")) + guides(colour=FALSE) +
theme(text = element_text(size=fontSize-5),
axis.text.x = element_text(size=fontSize-5),
axis.text.y = element_text(size=fontSize-5))
print(p)
}
vabc["0.9","Discovery"] = output_res["vabsabcabc_standard","SCequalRRB_vs_SCoverRRB_Disc.es"]
vabc["0.9","Replication"] = output_res["vabsabcabc_standard","SCequalRRB_vs_SCoverRRB_Rep.es"]
vabc_dls["0.9","Discovery"] = output_res["vabsdscoresd_dss","SCequalRRB_vs_SCoverRRB_Disc.es"]
vabc_dls["0.9","Replication"] = output_res["vabsdscoresd_dss","SCequalRRB_vs_SCoverRRB_Rep.es"]
output_res
## All_Disc.fstat All_Disc.pval All_Rep.fstat All_Rep.pval
## age 0.2855127 8.358420e-01 0.90805525 4.377311e-01
## meanFD 2.3602889 7.205717e-02 0.08650023 9.673869e-01
## viq_all 2.4328749 6.562775e-02 3.38300738 1.888191e-02
## piq_all 1.5615896 1.993320e-01 1.20983913 3.067449e-01
## fsiq4_all 2.3638575 7.174689e-02 2.06813815 1.050238e-01
## A_pct_severity 26.9658312 1.735245e-10 55.13216638 0.000000e+00
## B_pct_severity 29.7710577 2.495759e-11 2.34544394 9.994192e-02
## ADI_social_total 1.4890676 2.294944e-01 8.91634507 2.379619e-04
## ADI_communication_total 1.5868589 2.085961e-01 9.48327261 1.450273e-04
## ADI_RRB_total 26.0311927 3.355514e-10 0.95382609 3.880064e-01
## ados_2_SA_CSS 3.6051473 3.007004e-02 1.35353157 2.621776e-01
## ados_2_RRB_CSS 1.0836446 3.415424e-01 0.86272207 4.245701e-01
## SRS_tscore_self 36.4014535 0.000000e+00 48.01444961 6.661338e-16
## RBS_total 20.8275522 1.532618e-11 17.79086787 4.331766e-10
## SSP_total 31.8680712 1.665335e-15 24.34024007 1.511902e-12
## vabsdscoress_dss 23.1160444 2.343903e-12 24.54698034 5.052625e-13
## vabsdscoresd_dss 12.2992810 3.105951e-07 10.60071813 2.272064e-06
## vabsdscoresc_dss 9.8474547 5.733993e-06 10.38141779 2.937494e-06
## vabsabcabc_standard 18.5322400 2.855782e-10 17.01360570 1.429190e-09
## SCequalRRB_vs_SCoverRRB_Disc.fstat
## age 7.805025e-02
## meanFD 8.979743e-01
## viq_all 5.476685e-02
## piq_all 5.165830e-03
## fsiq4_all 1.504420e-02
## A_pct_severity 3.501725e+01
## B_pct_severity 3.781501e+01
## ADI_social_total 2.155285e+00
## ADI_communication_total 2.939368e+00
## ADI_RRB_total 2.847948e+01
## ados_2_SA_CSS 5.068942e-04
## ados_2_RRB_CSS 3.652838e-01
## SRS_tscore_self 6.311839e-02
## RBS_total 3.108287e+00
## SSP_total 2.274808e+00
## vabsdscoress_dss 2.312769e+00
## vabsdscoresd_dss 2.297887e+00
## vabsdscoresc_dss 6.674736e-01
## vabsabcabc_standard 3.406295e+00
## SCequalRRB_vs_SCoverRRB_Disc.tstat
## age 0.27937474
## meanFD -0.94761507
## viq_all 0.23402318
## piq_all -0.07187371
## fsiq4_all 0.12265479
## A_pct_severity 5.91753787
## B_pct_severity -6.14939138
## ADI_social_total 1.46808886
## ADI_communication_total 1.71445846
## ADI_RRB_total -5.33661688
## ados_2_SA_CSS 0.02251431
## ados_2_RRB_CSS -0.60438710
## SRS_tscore_self 0.25123374
## RBS_total -1.76303352
## SSP_total 1.50824657
## vabsdscoress_dss -1.52077905
## vabsdscoresd_dss -1.51587821
## vabsdscoresc_dss -0.81699058
## vabsabcabc_standard -1.84561518
## SCequalRRB_vs_SCoverRRB_Disc.pval
## age 7.804382e-01
## meanFD 3.452298e-01
## viq_all 8.153725e-01
## piq_all 9.428241e-01
## fsiq4_all 9.025856e-01
## A_pct_severity 3.170386e-08
## B_pct_severity 1.054411e-08
## ADI_social_total 1.446964e-01
## ADI_communication_total 8.902613e-02
## ADI_RRB_total 4.539420e-07
## ados_2_SA_CSS 9.820760e-01
## ados_2_RRB_CSS 5.467565e-01
## SRS_tscore_self 8.026429e-01
## RBS_total 8.091942e-02
## SSP_total 1.359286e-01
## vabsdscoress_dss 1.311086e-01
## vabsdscoresd_dss 1.323675e-01
## vabsdscoresc_dss 4.156530e-01
## vabsabcabc_standard 6.758935e-02
## SCequalRRB_vs_SCoverRRB_Disc.es
## age -0.0491267306
## meanFD 0.1707483830
## viq_all -0.0618981388
## piq_all 0.0175129387
## fsiq4_all -0.0221008674
## A_pct_severity -0.8963307629
## B_pct_severity 1.0979606772
## ADI_social_total -0.1601300150
## ADI_communication_total -0.1549178638
## ADI_RRB_total 0.9864464663
## ados_2_SA_CSS 0.0009583416
## ados_2_RRB_CSS 0.1270034368
## SRS_tscore_self 0.1041855065
## RBS_total 0.3735177224
## SSP_total -0.2913929275
## vabsdscoress_dss 0.2078098782
## vabsdscoresd_dss 0.2012499007
## vabsdscoresc_dss 0.1468494088
## vabsabcabc_standard 0.2736410750
## SCequalRRB_vs_SCoverRRB_Rep.fstat
## age 0.33755002
## meanFD 0.18055094
## viq_all 5.18677068
## piq_all 2.59636798
## fsiq4_all 3.33876047
## A_pct_severity 105.88313587
## B_pct_severity 2.57049807
## ADI_social_total 17.31772979
## ADI_communication_total 16.08830244
## ADI_RRB_total 0.01960920
## ados_2_SA_CSS 2.67460927
## ados_2_RRB_CSS 1.75112047
## SRS_tscore_self 0.03853939
## RBS_total 14.57591064
## SSP_total 2.48866761
## vabsdscoress_dss 19.68832383
## vabsdscoresd_dss 11.51435205
## vabsdscoresc_dss 18.65262122
## vabsabcabc_standard 22.61237695
## SCequalRRB_vs_SCoverRRB_Rep.tstat
## age -0.5809906
## meanFD -0.4249129
## viq_all -2.2774483
## piq_all -1.6113249
## fsiq4_all -1.8272275
## A_pct_severity 10.2899532
## B_pct_severity -1.6032773
## ADI_social_total 4.1614577
## ADI_communication_total 4.0110226
## ADI_RRB_total 0.1400329
## ados_2_SA_CSS 1.6354233
## ados_2_RRB_CSS -1.3232991
## SRS_tscore_self 0.1963145
## RBS_total 3.8178411
## SSP_total -1.5775511
## vabsdscoress_dss -4.4371527
## vabsdscoresd_dss -3.3932804
## vabsdscoresc_dss -4.3188680
## vabsabcabc_standard -4.7552473
## SCequalRRB_vs_SCoverRRB_Rep.pval
## age 5.622846e-01
## meanFD 6.716249e-01
## viq_all 2.450063e-02
## piq_all 1.096515e-01
## fsiq4_all 7.008981e-02
## A_pct_severity 0.000000e+00
## B_pct_severity 1.113773e-01
## ADI_social_total 5.811897e-05
## ADI_communication_total 1.029590e-04
## ADI_RRB_total 8.888576e-01
## ados_2_SA_CSS 1.045591e-01
## ados_2_RRB_CSS 1.882310e-01
## SRS_tscore_self 8.450502e-01
## RBS_total 2.249553e-04
## SSP_total 1.188223e-01
## vabsdscoress_dss 2.114563e-05
## vabsdscoresd_dss 9.529960e-04
## vabsdscoresc_dss 3.363672e-05
## vabsabcabc_standard 5.891804e-06
## SCequalRRB_vs_SCoverRRB_Rep.es
## age 0.103423565
## meanFD 0.075639786
## viq_all 0.449543002
## piq_all 0.364797390
## fsiq4_all 0.393005034
## A_pct_severity -1.743934905
## B_pct_severity 0.299146327
## ADI_social_total -0.688004039
## ADI_communication_total -0.659044077
## ADI_RRB_total -0.003443901
## ados_2_SA_CSS -0.260690681
## ados_2_RRB_CSS 0.154522997
## SRS_tscore_self -0.050543903
## RBS_total -0.669750517
## SSP_total 0.345836573
## vabsdscoress_dss 0.802957555
## vabsdscoresd_dss 0.631392506
## vabsdscoresc_dss 0.806944461
## vabsabcabc_standard 0.862310814
## SCequalRRB_vs_SCoverRRB.repBF varNames
## age 7.313631e-01 age
## meanFD 1.100533e+00 meanFD
## viq_all 7.216283e-01 viq_all
## piq_all 7.047582e-01 piq_all
## fsiq4_all 7.081294e-01 fsiq4_all
## A_pct_severity 3.968454e+06 A_pct_severity
## B_pct_severity 1.180802e+07 B_pct_severity
## ADI_social_total 2.055481e+00 ADI_social_total
## ADI_communication_total 3.023825e+00 ADI_communication_total
## ADI_RRB_total 2.839841e+05 ADI_RRB_total
## ados_2_SA_CSS 7.026229e-01 ados_2_SA_CSS
## ados_2_RRB_CSS 8.431448e-01 ados_2_RRB_CSS
## SRS_tscore_self 7.252645e-01 SRS_tscore_self
## RBS_total 3.279224e+00 RBS_total
## SSP_total 2.182111e+00 SSP_total
## vabsdscoress_dss 2.216565e+00 vabsdscoress_dss
## vabsdscoresd_dss 2.203469e+00 vabsdscoresd_dss
## vabsdscoresc_dss 9.809837e-01 vabsdscoresc_dss
## vabsabcabc_standard 3.795925e+00 vabsabcabc_standard
#------------------------------------------------------------------------------
# Z-score threshold to use for subtyping
z_thresh = 1
# vars2use = c("dbaes_atotal","dbaes_btotal")
# compute Discovery mean and sd
ds_disc = Dverbal_Discovery[,vars2use[1]] - Dverbal_Discovery[,vars2use[2]]
mean2use = mean(ds_disc)
sd2use = sd(ds_disc)
Dverbal_Discovery = make_subtype(data2use = Dverbal_Discovery,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
# compute Replication mean and sd
ds_rep = Dverbal_Replication[,vars2use[1]] - Dverbal_Replication[,vars2use[2]]
mean2use = mean(ds_rep)
sd2use = sd(ds_rep)
Dverbal_Replication = make_subtype(data2use = Dverbal_Replication,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#------------------------------------------------------------------------------
# Table of subtypes X sex
# Discovery
table(Dverbal_Discovery$z_ds_group,Dverbal_Discovery$sex)
##
## F M
## RRB_over_SC 26 115
## SC_equal_RRB 143 468
## SC_over_RRB 28 109
cs_res = chisq.test(Dverbal_Discovery$z_ds_group,Dverbal_Discovery$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: Dverbal_Discovery$z_ds_group and Dverbal_Discovery$sex
## X-squared = 1.9153, df = 2, p-value = 0.3838
# Replication
table(Dverbal_Replication$z_ds_group,Dverbal_Replication$sex)
##
## F M
## RRB_over_SC 22 115
## SC_equal_RRB 147 482
## SC_over_RRB 27 97
cs_res = chisq.test(Dverbal_Replication$z_ds_group,Dverbal_Replication$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: Dverbal_Replication$z_ds_group and Dverbal_Replication$sex
## X-squared = 3.5077, df = 2, p-value = 0.1731
#------------------------------------------------------------------------------
# Descriptive stats
# Discovery
df2use = Dverbal_Discovery[,c("subjectkey","dataset_id","collection_id","interview_age","sex","z_ds_group","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")]
df2use$age = df2use$interview_age/12
cols2use = c("z_ds_group","sex","age","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")
res=describeBy(x = df2use[,cols2use], group = "z_ds_group")
res
##
## Descriptive statistics by group
## group: RRB_over_SC
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 141 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00
## sex* 2 141 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 141 10.05 4.02 9.33 9.76 3.34 3.00 22.58 19.58
## ados_age 4 19 86.95 34.46 84.00 84.94 26.69 37.00 171.00 134.00
## ados_sa_css 5 19 6.11 2.49 7.00 6.12 2.97 2.00 10.00 8.00
## ados_rrb_css 6 19 7.37 2.29 8.00 7.59 1.48 1.00 10.00 9.00
## iq 7 39 102.69 14.59 102.00 103.03 16.31 67.00 139.00 72.00
## dbaes_atotal 8 141 0.21 0.10 0.20 0.20 0.11 0.01 0.45 0.44
## dbaes_btotal 9 141 0.49 0.12 0.49 0.49 0.13 0.26 0.79 0.53
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 0.70 0.14 0.34
## ados_age 0.69 -0.02 7.91
## ados_sa_css -0.19 -1.35 0.57
## ados_rrb_css -1.14 0.78 0.53
## iq -0.20 0.18 2.34
## dbaes_atotal 0.15 -0.79 0.01
## dbaes_btotal 0.22 -0.72 0.01
## ------------------------------------------------------------
## group: SC_equal_RRB
## vars n mean sd median trimmed mad min max range skew
## z_ds_group* 1 611 2.00 0.00 2.00 2.00 0.00 2 2.00 0.00 NaN
## sex* 2 611 NaN NA NA NaN NA Inf -Inf -Inf NA
## age 3 611 9.03 5.38 8.08 8.29 4.69 0 45.75 45.75 1.96
## ados_age 4 99 84.88 45.80 73.00 80.72 50.41 27 202.00 175.00 0.64
## ados_sa_css 5 99 6.92 2.01 7.00 7.01 1.48 1 10.00 9.00 -0.37
## ados_rrb_css 6 99 7.68 2.30 8.00 8.06 1.48 1 10.00 9.00 -1.57
## iq 7 142 103.89 18.97 107.00 105.53 17.79 40 138.00 98.00 -0.95
## dbaes_atotal 8 611 0.31 0.14 0.31 0.31 0.14 0 0.70 0.70 0.01
## dbaes_btotal 9 611 0.32 0.14 0.32 0.32 0.14 0 0.68 0.68 -0.05
## kurtosis se
## z_ds_group* NaN 0.00
## sex* NA NA
## age 6.69 0.22
## ados_age -0.85 4.60
## ados_sa_css -0.18 0.20
## ados_rrb_css 2.33 0.23
## iq 1.28 1.59
## dbaes_atotal -0.32 0.01
## dbaes_btotal -0.33 0.01
## ------------------------------------------------------------
## group: SC_over_RRB
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 137 3.00 0.00 3.00 3.00 0.00 3.00 3.00 0.00
## sex* 2 137 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 137 7.24 5.50 5.50 6.19 2.84 1.67 37.33 35.67
## ados_age 4 35 61.69 26.99 62.00 58.38 23.72 30.00 172.00 142.00
## ados_sa_css 5 35 7.46 1.60 7.00 7.48 1.48 4.00 10.00 6.00
## ados_rrb_css 6 35 8.37 1.31 8.00 8.41 1.48 5.00 10.00 5.00
## iq 7 18 106.83 17.10 111.50 106.50 17.79 79.00 140.00 61.00
## dbaes_atotal 8 137 0.51 0.13 0.50 0.51 0.13 0.20 0.87 0.67
## dbaes_btotal 9 137 0.21 0.10 0.21 0.21 0.09 0.00 0.47 0.47
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 2.50 7.77 0.47
## ados_age 1.89 5.55 4.56
## ados_sa_css -0.11 -0.88 0.27
## ados_rrb_css -0.30 -0.76 0.22
## iq 0.05 -1.12 4.03
## dbaes_atotal 0.27 -0.24 0.01
## dbaes_btotal 0.15 -0.12 0.01
# Replication
df2use = Dverbal_Replication[,c("subjectkey","dataset_id","collection_id","interview_age","sex","z_ds_group","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")]
df2use$age = df2use$interview_age/12
cols2use = c("z_ds_group","sex","age","ados_age","ados_sa_css","ados_rrb_css","iq","dbaes_atotal","dbaes_btotal")
res=describeBy(x = df2use[,cols2use], group = "z_ds_group")
res
##
## Descriptive statistics by group
## group: RRB_over_SC
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 137 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00
## sex* 2 137 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 137 9.76 4.35 9.42 9.39 3.83 3.17 28.58 25.42
## ados_age 4 8 112.00 52.41 100.00 112.00 58.56 39.00 189.00 150.00
## ados_sa_css 5 8 6.62 2.39 7.00 6.62 2.22 3.00 9.00 6.00
## ados_rrb_css 6 8 6.62 1.06 7.00 6.62 0.00 5.00 8.00 3.00
## iq 7 40 106.92 15.88 105.50 105.66 14.08 70.00 152.00 82.00
## dbaes_atotal 8 137 0.21 0.10 0.21 0.21 0.09 0.01 0.51 0.50
## dbaes_btotal 9 137 0.52 0.12 0.51 0.51 0.11 0.24 0.93 0.69
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 1.19 2.73 0.37
## ados_age 0.13 -1.71 18.53
## ados_sa_css -0.60 -1.41 0.84
## ados_rrb_css -0.60 -1.26 0.37
## iq 0.57 0.61 2.51
## dbaes_atotal 0.60 0.57 0.01
## dbaes_btotal 0.67 0.97 0.01
## ------------------------------------------------------------
## group: SC_equal_RRB
## vars n mean sd median trimmed mad min max range skew
## z_ds_group* 1 629 2.00 0.00 2.00 2.00 0.00 2 2.00 0.00 NaN
## sex* 2 629 NaN NA NA NaN NA Inf -Inf -Inf NA
## age 3 629 9.10 5.54 8.00 8.27 4.57 0 40.92 40.92 1.74
## ados_age 4 119 79.42 37.97 71.00 75.51 41.51 35 196.00 161.00 0.83
## ados_sa_css 5 119 6.93 2.04 7.00 6.98 1.48 2 10.00 8.00 -0.06
## ados_rrb_css 6 119 7.40 2.30 8.00 7.71 1.48 1 10.00 9.00 -1.31
## iq 7 135 105.44 17.96 108.00 105.90 16.31 57 146.00 89.00 -0.30
## dbaes_atotal 8 629 0.31 0.14 0.31 0.31 0.14 0 0.78 0.78 0.04
## dbaes_btotal 9 629 0.33 0.14 0.33 0.33 0.14 0 0.81 0.81 0.12
## kurtosis se
## z_ds_group* NaN 0.00
## sex* NA NA
## age 4.28 0.22
## ados_age 0.00 3.48
## ados_sa_css -0.83 0.19
## ados_rrb_css 1.50 0.21
## iq 0.23 1.55
## dbaes_atotal -0.20 0.01
## dbaes_btotal 0.18 0.01
## ------------------------------------------------------------
## group: SC_over_RRB
## vars n mean sd median trimmed mad min max range
## z_ds_group* 1 124 3.00 0.00 3.00 3.00 0.00 3.00 3.00 0.00
## sex* 2 124 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 124 6.92 5.14 5.25 5.93 2.72 2.08 28.50 26.42
## ados_age 4 26 66.15 24.50 59.50 63.82 22.98 30.00 141.00 111.00
## ados_sa_css 5 26 6.96 1.73 6.50 6.95 2.22 3.00 10.00 7.00
## ados_rrb_css 6 26 7.42 2.55 8.00 7.77 2.97 1.00 10.00 9.00
## iq 7 11 117.27 13.76 119.00 118.78 13.34 87.00 134.00 47.00
## dbaes_atotal 8 124 0.53 0.13 0.52 0.52 0.13 0.25 0.96 0.71
## dbaes_btotal 9 124 0.21 0.11 0.20 0.21 0.11 0.00 0.50 0.50
## skew kurtosis se
## z_ds_group* NaN NaN 0.00
## sex* NA NA NA
## age 2.35 6.07 0.46
## ados_age 1.06 1.15 4.81
## ados_sa_css 0.01 -0.68 0.34
## ados_rrb_css -1.10 0.50 0.50
## iq -0.81 -0.42 4.15
## dbaes_atotal 0.31 0.20 0.01
## dbaes_btotal 0.22 -0.43 0.01
#------------------------------------------------------------------------------
# Tests of differences in interview age across the subtypes
# Discovery
mod2use = lm(formula = interview_age ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: interview_age
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 82914 41457 10.618 2.772e-05 ***
## Residuals 886 3459152 3904
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = interview_age ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: interview_age
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 88049 44025 10.801 2.319e-05 ***
## Residuals 887 3615315 4076
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in SC across the subtypes
# Discovery
mod2use = lm(formula = dbaes_atotal ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_atotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 7.1565 3.5782 212.57 < 2.2e-16 ***
## Residuals 886 14.9143 0.0168
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = dbaes_atotal ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_atotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 6.7942 3.3971 191.67 < 2.2e-16 ***
## Residuals 887 15.7212 0.0177
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in RRB across the subtypes
# Discovery
mod2use = lm(formula = dbaes_btotal ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_btotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 5.7364 2.86820 174.19 < 2.2e-16 ***
## Residuals 886 14.5884 0.01647
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = dbaes_btotal ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: dbaes_btotal
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 6.5659 3.2830 184.74 < 2.2e-16 ***
## Residuals 887 15.7627 0.0178
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Tests of differences in ADOS SA CSS across the subtypes
# Discovery
mod2use = lm(formula = ados_sa_css ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_sa_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 22.64 11.3209 2.85 0.06098 .
## Residuals 150 595.83 3.9722
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Replication
mod2use = lm(formula = ados_sa_css ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_sa_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 0.76 0.3801 0.094 0.9103
## Residuals 150 606.30 4.0420
#------------------------------------------------------------------------------
# Tests of differences in ADOS RRB CSS across the subtypes
# Discovery
mod2use = lm(formula = ados_rrb_css ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_rrb_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 16.47 8.2350 1.8375 0.1628
## Residuals 150 672.25 4.4817
# Replication
mod2use = lm(formula = ados_rrb_css ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: ados_rrb_css
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 4.64 2.3217 0.4392 0.6453
## Residuals 150 792.86 5.2857
#------------------------------------------------------------------------------
# Tests of differences in IQ across the subtypes
# Discovery
mod2use = lm(formula = iq ~ z_ds_group, data = Dverbal_Discovery)
anova(mod2use)
## Analysis of Variance Table
##
## Response: iq
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 212 105.85 0.3251 0.7228
## Residuals 196 63817 325.60
# Replication
mod2use = lm(formula = iq ~ z_ds_group, data = Dverbal_Replication)
anova(mod2use)
## Analysis of Variance Table
##
## Response: iq
## Df Sum Sq Mean Sq F value Pr(>F)
## z_ds_group 2 1434 716.97 2.3868 0.09478 .
## Residuals 183 54972 300.40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#------------------------------------------------------------------------------
# Make scatterplots with difference score Z subtypes in different colors
maxScores = c(3,4)
p_disc = ggplot(data = Dverbal_Discovery, aes(x = dbaes_atotal, y = dbaes_btotal, colour = z_ds_group)) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,1) + xlim(0,1) + ggtitle("NDAR Discovery")
p1_top_left = p_disc + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Disc_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p1_top_left)
p_disc
table(Dverbal_Discovery$z_ds_group)
##
## RRB_over_SC SC_equal_RRB SC_over_RRB
## 141 611 137
cor_res = cor.test(Dverbal_Discovery$dbaes_atotal, Dverbal_Discovery$dbaes_btotal)
cor_res
##
## Pearson's product-moment correlation
##
## data: Dverbal_Discovery$dbaes_atotal and Dverbal_Discovery$dbaes_btotal
## t = 6.6829, df = 887, p-value = 4.134e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1554332 0.2806578
## sample estimates:
## cor
## 0.2189469
p_rep = ggplot(data = Dverbal_Replication, aes(x = dbaes_atotal, y = dbaes_btotal, colour = z_ds_group)) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,1) + xlim(0,1) + ggtitle("NDAR Replication")
p2_bottom_left = p_rep + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Rep_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p2_bottom_left)
p_rep
table(Dverbal_Replication$z_ds_group)
##
## RRB_over_SC SC_equal_RRB SC_over_RRB
## 137 629 124
cor_res = cor.test(Dverbal_Replication$dbaes_atotal, Dverbal_Replication$dbaes_btotal)
cor_res
##
## Pearson's product-moment correlation
##
## data: Dverbal_Replication$dbaes_atotal and Dverbal_Replication$dbaes_btotal
## t = 7.1459, df = 888, p-value = 1.864e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1700824 0.2943934
## sample estimates:
## cor
## 0.2331903
#------------------------------------------------------------------------------
# Run supervised model with Discovery as training and Replication as Test
# run validation
# make subtypes using z-scores computed from the mean and sd of the training set
train_data = Dverbal_Discovery
test_data = Dverbal_Replication
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
tmp_train = make_subtype(data2use = train_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
mean2use = mean(test_data[,vars2use[1]] - test_data[,vars2use[2]])
sd2use = sd(test_data[,vars2use[1]] - test_data[,vars2use[2]])
tmp_test = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#==============================================================================
#==============================================================================
# make labels based on train mean and sd
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
train_mean = mean2use
train_sd = sd2use
pred_labels = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = train_mean,
sd2use = train_sd)
confmat = table(tmp_test$z_ds_group,pred_labels$z_ds_group)
acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/dim(pred_labels)[1]
# # compute model
# mod2use = svm(x = tmp_train[,vars2use], y = tmp_train$z_ds_group)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
#==============================================================================
#==============================================================================
# plot confusion matrix
setHook("grid.newpage", function() pushViewport(viewport(x=1,y=1,width=0.9, height=0.9, name="vp", just=c("right","top"))), action="prepend")
pheatmap(confmat/rowSums(confmat)*100, display_numbers = confmat, color = colorRampPalette(c('white','red'))(100), cluster_rows = FALSE, cluster_cols = FALSE, fontsize_number = fontSize, fontsize_row = fontSize, fontsize_col = fontSize,labels_row = c("RRB>SC","SC=RRB","SC>RRB"),labels_col = c("RRB>SC","SC=RRB","SC>RRB"),angle_col=90,
breaks= seq(0,100, length=100))
setHook("grid.newpage", NULL, "replace")
grid::grid.text("Actual Labels", y=-0.07, gp=gpar(fontsize=fontSize))
grid::grid.text("Predicted Labels", x=-0.07, rot=90, gp=gpar(fontsize=fontSize))
# show accuracy
true_accuracy = acc
true_accuracy
## [1] 0.9910112
#================================================================================
#================================================================================
# #------------------------------------------------------------------------------
# # Permute subtype labels to examine how well supervised model performs
#
# # nperm = 10000
#
# # make subtypes using z-scores computed from the mean and sd of the training set
# train_data = Dverbal_Discovery
# test_data = Dverbal_Replication
# mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]])
# sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]])
# tmp_train = make_subtype(data2use = train_data,
# z_thresh = z_thresh,
# mean2use = mean2use,
# sd2use = sd2use)
#
# mean2use = mean(test_data[,vars2use[1]] - test_data[,vars2use[2]])
# sd2use = sd(test_data[,vars2use[1]] - test_data[,vars2use[2]])
# tmp_test = make_subtype(data2use = test_data,
# z_thresh = z_thresh,
# mean2use = mean2use,
# sd2use = sd2use)
#
# acc = vector(length = nperm)
# for (iperm in 1:nperm){
# # set seed for reproducibility
# set.seed(iperm)
#
# sc_perm = sample(train_data[,vars2use[1]])
# rrb_perm = sample(train_data[,vars2use[2]])
# perm_mean2use = mean(sc_perm - rrb_perm)
# perm_sd2use = sd(sc_perm - rrb_perm)
# # perm_mean2use = mean(train_data[,vars2use[1]] - rrb_perm)
# # perm_sd2use = sd(train_data[,vars2use[1]] - rrb_perm)
# pred_labels = make_subtype(data2use = tmp_test,
# z_thresh = z_thresh,
# mean2use = perm_mean2use,
# sd2use = perm_sd2use)
# confmat = table(tmp_test$z_ds_group,pred_labels$z_ds_group)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/dim(pred_labels)[1]
#
# # compute model
# permuted_labels = sample(tmp_train$z_ds_group)
# mod2use = svm(x = tmp_train[,vars2use], y = permuted_labels)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc[iperm] = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
# #============================================================================
# } # for (iperm in 1:nperm)
#
# df2plot = data.frame(Accuracy = acc)
# p = ggplot(data = df2plot, aes(x = Accuracy)) + geom_histogram() + geom_vline(xintercept=true_accuracy)
# p
#
# # compute p-value
# pval = sum(c(true_accuracy,acc)>=true_accuracy)/(nperm+1)
# pval
#================================================================================
#================================================================================
#------------------------------------------------------------------------------
# Plot difference score Z subtypes in Discovery set
maxScores = c(3,4)
# Discovery - make plot with all individuals shown as lines
df2use = Dverbal_Discovery[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(tmp_train$z_ds_group)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + ylim(0,1) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p3_middle_top = p + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Disc_jitterplot_z%s.pdf",as.character(z_thresh))), plot = p3_middle_top)
p
# Plot difference score Z subtypes in Replication set
maxScores = c(3,4)
# Replication - make plot with all individuals shown as lines
df2use = Dverbal_Replication[,c("subjectkey",adi_total_vars2use)]
df2use$subgrp = factor(tmp_test$z_ds_group)
df2use = data.frame(df2use)
df2use$subjectkey = factor(df2use$subjectkey)
df2use$SC = df2use$dbaes_atotal
df2use$RRB = df2use$dbaes_btotal
df4plot = melt(df2use,
id.vars = c("subjectkey","subgrp"),
measure.vars = c("SC","RRB"))
p = ggplot(data = df4plot, aes(x = variable,
y = value,
colour = subgrp,
group = subjectkey)) + facet_grid(. ~ subgrp)
p = p + geom_point(shape=1) + geom_line(alpha = 0.2) + ylim(0,1) + guides(color=FALSE)
p = p + ylab("Percent Severity") + xlab("ADI-R subscale")
p4_middle_bottom = p + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_NDAR_Rep_jitterplot_z%s.pdf",as.character(z_thresh))), plot = p4_middle_bottom)
p
#------------------------------------------------------------------------------
# Apply NDAR subtypes to EU-AIMS LEAP data
# make SC and RRB percentages since EU-AIMS data is specified as percentages
Dverbal = read.csv(file.path(datapath,"tidy_verbal.csv"))
euaims_data = read.csv(file.path(datapath,"tidy_euaims.csv"))
mask1 = euaims_data$Diagnosis=="ASD"
mask2 = (is.na(euaims_data$A1_pct_severity) | is.na(euaims_data$A2_pct_severity) | is.na(euaims_data$A3_pct_severity) | is.na(euaims_data$B1_pct_severity) | is.na(euaims_data$B2_pct_severity) | is.na(euaims_data$B3_pct_severity) | is.na(euaims_data$B4_pct_severity))
euaims_data = subset(euaims_data, (mask1 & !mask2))
Dverbal[,vars2use[1]] = Dverbal[,vars2use[1]]
Dverbal[,vars2use[2]] = Dverbal[,vars2use[2]]
euaims_data[,vars2use[1]] = (euaims_data$A1_pct_severity +
euaims_data$A2_pct_severity +
euaims_data$A3_pct_severity)/3
euaims_data[,vars2use[2]] = (euaims_data$B1_pct_severity +
euaims_data$B2_pct_severity +
euaims_data$B3_pct_severity +
euaims_data$B4_pct_severity)/4
train_data = Dverbal
test_data = euaims_data
mean2use = mean(train_data[,vars2use[1]] - train_data[,vars2use[2]], na.rm=TRUE)
sd2use = sd(train_data[,vars2use[1]] - train_data[,vars2use[2]], na.rm=TRUE)
c(mean2use, sd2use)
## [1] -0.01045243 0.19482749
tmp_train = make_subtype(data2use = train_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
tmp_test = make_subtype(data2use = test_data,
z_thresh = z_thresh,
mean2use = mean2use,
sd2use = sd2use)
#===========================================================================
#===========================================================================
# # compute model
# mod2use = svm(x = tmp_train[,vars2use], y = tmp_train$z_ds_group)
# pred_labels = predict(mod2use, tmp_test[,vars2use])
# confmat = table(tmp_test$z_ds_group,pred_labels)
# acc = (confmat[1,1]+confmat[2,2]+confmat[3,3])/length(pred_labels)
#
# tmp_test$svm_pred_labels = pred_labels
#
# # plot confusion matrix
# setHook("grid.newpage", function() pushViewport(viewport(x=1,y=1,width=0.9, height=0.9, name="vp", just=c("right","top"))), action="prepend")
# pheatmap(confmat/rowSums(confmat)*100, display_numbers = confmat, color = colorRampPalette(c('white','red'))(100), cluster_rows = FALSE, cluster_cols = FALSE, fontsize_number = fontSize, fontsize_row = fontSize, fontsize_col = fontSize,labels_row = c("RRB>SC","SC=RRB","SC>RRB"),labels_col = c("RRB>SC","SC=RRB","SC>RRB"),angle_col=90,
# breaks= seq(0,100, length=100))
# setHook("grid.newpage", NULL, "replace")
# grid::grid.text("Actual Labels", y=-0.07, gp=gpar(fontsize=fontSize))
# grid::grid.text("Predicted Labels", x=-0.07, rot=90, gp=gpar(fontsize=fontSize))
#===========================================================================
#===========================================================================
# scatterplot
p1 = ggplot(data = tmp_train, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(z_ds_group))) + geom_point() + xlab("Social-Communication") + ylab("Restricted Repetitive Behaviors") + ylim(0,1) + xlim(0,1) + ggtitle("NDAR ALL")
p1
p2 = ggplot(data = tmp_test, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(z_ds_group))) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,1) + xlim(0,1) + ggtitle("EU-AIMS with Groups from NDAR ALL") +
theme(text = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize))
p5_bottom_right = p2 + guides(colour=FALSE)
ggsave(filename = file.path(plotpath, sprintf("final_EUAIMS_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p5_bottom_right)
p2
#===========================================================================
#===========================================================================
# p3 = ggplot(data = tmp_test, aes(x = dbaes_atotal, y = dbaes_btotal, colour = factor(pred_labels))) + geom_point() + xlab("SC") + ylab("RRB") + ylim(0,0.8) + xlim(0,0.8) + ggtitle("EU-AIMS")
# p5_bottom_right = p3 + guides(colour=FALSE)
# ggsave(filename = file.path(plotpath, sprintf("final_EUAIMS_scatterplot_z%s.pdf",as.character(z_thresh))), plot = p5_bottom_right)
# p3
#===========================================================================
#===========================================================================
# # write out EU-AIMS LEAP data with subgroups defined by NDAR All
write.csv(tmp_test, file.path(datapath, sprintf("tidy_euaims_NDAR_subtypes_diffscore_z%s.csv",as.character(z_thresh))))
#===========================================================================
#===========================================================================
# # Make final plot
# p_final = p1_top_left + p3_middle_top + plot_spacer() + p2_bottom_left + p4_middle_bottom + p5_bottom_right + plot_layout(nrow=3, ncol=3, widths = c(4,4,4), heights = c(8,8,8))
# ggsave(filename = file.path(plotpath, sprintf("final_NDAR_EUAIMS_subtypes_plot_z%s.pdf",as.character(z_thresh))), plot = p_final)
# p_final
#===========================================================================
#===========================================================================
#------------------------------------------------------------------------------
# Integrate subtypes with rest of EU-AIMS LEAP data
euaims_data = read.csv(file.path(datapath,"tidy_euaims.csv"))
td_df = subset(euaims_data, euaims_data$Diagnosis=="TD",select=2:6)
td_df$A_pct_severity = as.numeric(NA)
td_df$B_pct_severity = as.numeric(NA)
td_df$subgrp = "TD"
#===========================================================================
#===========================================================================
# cols2use = c("subid","age","Centre","Schedule","Diagnosis","dbaes_atotal","dbaes_btotal","svm_pred_labels")
cols2use = c("subid","age","Centre","Schedule","Diagnosis","dbaes_atotal","dbaes_btotal","z_ds_group")
#===========================================================================
#===========================================================================
tmp_asd = tmp_test[,cols2use]
colnames(tmp_asd)[6] = "A_pct_severity"
colnames(tmp_asd)[7] = "B_pct_severity"
colnames(tmp_asd)[8] = "subgrp"
asd_df = tmp_asd
all_data = rbind(td_df,asd_df)
fname = "/Users/mlombardo/Dropbox/euaims/data/rsfmri_preproc/euaims_preproc.xlsx"
pp_data = read_excel(fname)
mask = pp_data$notes=="ok"
pp_data = subset(pp_data, mask)
asd_df = merge(pp_data[,c("subid","sex")],asd_df, by = "subid")
td_df = merge(pp_data[,c("subid","sex")],td_df, by = "subid")
all_data = rbind(td_df,asd_df)
data2write = merge(pp_data, all_data, by = "subid")
data2write$age = data2write$age.x
data2write$sex = data2write$sex.y
print(table(data2write$Schedule, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## A 4 47 29 78
## B 0 54 31 83
## C 3 39 21 59
## D 0 20 18 23
print(table(data2write$Centre, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## CAMBRIDGE 2 33 7 29
## KINGS_COLLEGE 4 59 43 78
## MANNHEIM 0 0 0 34
## NIJMEGEN 1 56 36 64
## UTRECHT 0 12 13 38
print(table(data2write$sex, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## Female 3 41 27 88
## Male 4 119 72 155
#DSM-5 - find best split that balances participants across sites
a = findBestSplit(asd_df, seed_range = c(172342)) #,300001:500000))
## [1] 172342
best_seeds = a$seed[!is.na(a$discrepancy) & a$discrepancy==min(a$discrepancy, na.rm = TRUE)]
print(best_seeds)
## [1] 172342
# Split datasets -------------------------------------------------------------
rngSeed = best_seeds[1]
# split Schedule A dataset
dset2use = subset(asd_df, asd_df$Schedule=="A")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
A_Discovery = tmp_d[[2]]
A_Replication = tmp_d[[1]]
# split Schedule B dataset
dset2use = subset(asd_df, asd_df$Schedule=="B")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
B_Discovery = tmp_d[[2]]
B_Replication = tmp_d[[1]]
# split Schedule C dataset
dset2use = subset(asd_df, asd_df$Schedule=="C")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
C_Discovery = tmp_d[[2]]
C_Replication = tmp_d[[1]]
# split Schedule D dataset
dset2use = subset(asd_df, asd_df$Schedule=="D")
tmp_d = SplitDatasetsBySex(dset2use, rngSeed = rngSeed)
D_Discovery = tmp_d[[2]]
D_Replication = tmp_d[[1]]
df_Disc = rbind(A_Discovery, B_Discovery, C_Discovery, D_Discovery)
df_Rep = rbind(A_Replication, B_Replication, C_Replication, D_Replication)
a = table(df_Disc$Schedule, df_Disc$Centre); print(a)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A 6 17 10 7
## B 9 16 14 3
## C 6 9 14 2
## D 0 10 10 0
b = table(df_Rep$Schedule, df_Rep$Centre); print(b)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A 7 18 10 5
## B 8 17 13 5
## C 6 10 13 3
## D 0 9 9 0
print(a-b)
##
## CAMBRIDGE KINGS_COLLEGE NIJMEGEN UTRECHT
## A -1 -1 0 2
## B 1 -1 1 -2
## C 0 -1 1 -1
## D 0 1 1 0
print(sum(rowSums(abs(a-b))))
## [1] 14
data2write$dataset = NA
mask = is.element(data2write$subid,df_Disc$subid)
data2write[mask,"dataset"] = "Discovery"
mask = is.element(data2write$subid,df_Rep$subid)
data2write[mask,"dataset"] = "Replication"
asd_Disc = subset(data2write, data2write$dataset=="Discovery" & data2write$Diagnosis=="ASD")
asd_Rep = subset(data2write, data2write$dataset=="Replication" & data2write$Diagnosis=="ASD")
# # find which seed gives best TD age-match -------------------------------------
# seeds = 1:1000
# pvals = data.frame(matrix(nrow = length(seeds), ncol = 2))
# for (i in 1:length(seeds)) {
# res = findTDAgeMatch(data2write, seed_range = c(seeds[i],seeds[i]))
# td_Disc_matched = res[[2]]
# td_Rep_matched = res[[1]]
# tres = t.test(td_Disc_matched$age, asd_Disc$age)
# pvals[i,1] = tres$p.value
# tres = t.test(td_Rep_matched$age, asd_Rep$age)
# pvals[i,2] = tres$p.value
# #print(i)
# }
# a = sort.int(pvals[,1], decreasing = TRUE, index.return = TRUE)
# b=pvals[a$ix,]
seed2use = 929
res = findTDAgeMatch(data2write, seed_range = c(seed2use,seed2use))
td_Disc_matched = res[[2]]
td_Rep_matched = res[[1]]
mask = is.element(data2write$subid, td_Disc_matched$subid)
data2write$dataset[mask] = "Discovery"
mask = is.element(data2write$subid, td_Rep_matched$subid)
data2write$dataset[mask] = "Replication"
print(table(data2write$dataset, data2write$subgrp))
##
## RRB_over_SC SC_equal_RRB SC_over_RRB TD
## Discovery 6 77 50 121
## Replication 1 83 49 122
fname2save = here(sprintf("asd_subgrp_data_rsfmri_ALL_DSM5_diffzscoreGrps_z%s.csv",as.character(z_thresh)))
write.csv(data2write,file = fname2save)
#------------------------------------------------------------------------------
# Descriptive stats
df2use = data2write[,c("subid","Diagnosis","dataset","Centre","meanFD","age","sex","subgrp","viq_all","piq_all","fsiq4_all","A_pct_severity","B_pct_severity","ADI_social_total","ADI_communication_total","ADI_RRB_total","ados_2_SA_CSS","ados_2_RRB_CSS","SRS_tscore","SRS_tscore_self","RBS_total","SSP_total","vabsdscoresc_dss","vabsdscoresd_dss","vabsdscoress_dss","vabsabcabc_standard")]
mask = df2use==999 | df2use==777
df2use[mask] = NA
df2use$age = df2use$age/365
cols2use = c("dataset","subgrp","age","meanFD","viq_all","piq_all","fsiq4_all","A_pct_severity","B_pct_severity","ADI_social_total","ADI_communication_total","ADI_RRB_total","ados_2_SA_CSS","ados_2_RRB_CSS","SRS_tscore","SRS_tscore_self","RBS_total","SSP_total","vabsdscoresc_dss","vabsdscoresd_dss","vabsdscoress_dss","vabsabcabc_standard")
res=describeBy(x = df2use[,cols2use], group = c("subgrp","dataset"))
res
##
## Descriptive statistics by group
## subgrp: RRB_over_SC
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 6 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 6 NaN NA NA NaN NA Inf -Inf
## age 3 6 18.08 7.13 21.77 18.08 2.50 7.89 23.88
## meanFD 4 6 0.29 0.42 0.15 0.29 0.07 0.06 1.14
## viq_all 5 6 102.64 16.25 103.00 102.64 13.34 78.00 123.85
## piq_all 6 6 102.83 10.76 103.00 102.83 13.34 87.00 114.00
## fsiq4_all 7 6 102.67 14.63 103.00 102.67 14.83 80.00 118.01
## A_pct_severity 8 6 0.18 0.12 0.12 0.18 0.06 0.05 0.35
## B_pct_severity 9 6 0.47 0.09 0.48 0.47 0.11 0.36 0.59
## ADI_social_total 10 6 15.83 8.33 16.50 15.83 9.64 5.00 26.00
## ADI_communication_total 11 6 14.67 6.31 14.50 14.67 7.41 6.00 23.00
## ADI_RRB_total 12 6 9.00 1.26 9.50 9.00 0.74 7.00 10.00
## ados_2_SA_CSS 13 6 3.83 3.37 2.50 3.83 2.22 1.00 9.00
## ados_2_RRB_CSS 14 6 4.83 4.22 4.50 4.83 5.19 1.00 9.00
## SRS_tscore 15 3 71.33 16.65 66.00 71.33 11.86 58.00 90.00
## SRS_tscore_self 16 4 59.00 7.12 61.00 59.00 4.45 49.00 65.00
## RBS_total 17 3 17.33 10.69 15.00 17.33 10.38 8.00 29.00
## SSP_total 18 2 143.00 33.94 143.00 143.00 35.58 119.00 167.00
## vabsdscoresc_dss 19 4 86.50 20.68 82.00 86.50 14.83 67.00 115.00
## vabsdscoresd_dss 20 4 71.00 9.63 74.00 71.00 3.71 57.00 79.00
## vabsdscoress_dss 21 4 70.75 10.34 72.00 70.75 8.15 57.00 82.00
## vabsabcabc_standard 22 4 74.00 12.03 74.50 74.00 11.86 59.00 88.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 16.00 -0.52 -1.90 2.91
## meanFD 1.09 1.32 -0.16 0.17
## viq_all 45.85 -0.18 -1.59 6.64
## piq_all 27.00 -0.20 -1.80 4.39
## fsiq4_all 38.01 -0.31 -1.70 5.97
## A_pct_severity 0.30 0.42 -1.90 0.05
## B_pct_severity 0.23 0.05 -2.03 0.04
## ADI_social_total 21.00 -0.06 -2.00 3.40
## ADI_communication_total 17.00 -0.02 -1.77 2.58
## ADI_RRB_total 3.00 -0.49 -1.70 0.52
## ados_2_SA_CSS 8.00 0.51 -1.77 1.38
## ados_2_RRB_CSS 8.00 0.02 -2.29 1.72
## SRS_tscore 32.00 0.29 -2.33 9.61
## SRS_tscore_self 16.00 -0.50 -1.88 3.56
## RBS_total 21.00 0.21 -2.33 6.17
## SSP_total 48.00 0.00 -2.75 24.00
## vabsdscoresc_dss 48.00 0.42 -1.89 10.34
## vabsdscoresd_dss 22.00 -0.61 -1.76 4.81
## vabsdscoress_dss 25.00 -0.26 -1.87 5.17
## vabsabcabc_standard 29.00 -0.09 -1.94 6.01
## ------------------------------------------------------------
## subgrp: SC_equal_RRB
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 77 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 77 NaN NA NA NaN NA Inf -Inf
## age 3 77 16.29 5.72 14.98 16.09 5.98 7.08 30.28
## meanFD 4 77 0.28 0.45 0.18 0.22 0.13 0.03 3.95
## viq_all 5 76 98.00 19.20 101.00 98.01 17.71 61.00 140.91
## piq_all 6 76 99.57 21.64 102.00 99.65 19.98 58.00 150.00
## fsiq4_all 7 77 98.94 18.89 102.50 99.34 19.88 60.00 143.00
## A_pct_severity 8 77 0.31 0.14 0.31 0.31 0.13 0.00 0.63
## B_pct_severity 9 77 0.29 0.16 0.28 0.28 0.17 0.01 0.69
## ADI_social_total 10 77 16.83 6.81 16.00 17.13 8.90 2.00 27.00
## ADI_communication_total 11 77 14.00 5.87 14.00 14.08 5.93 0.00 26.00
## ADI_RRB_total 12 77 5.08 2.54 5.00 5.06 2.97 0.00 12.00
## ados_2_SA_CSS 13 76 6.22 2.60 6.50 6.31 2.97 1.00 10.00
## ados_2_RRB_CSS 14 76 4.86 2.83 5.00 4.82 2.97 1.00 10.00
## SRS_tscore 15 67 72.04 12.35 74.00 72.33 14.83 45.00 95.00
## SRS_tscore_self 16 35 63.11 12.85 64.00 62.14 13.34 43.00 94.00
## RBS_total 17 64 18.83 16.40 17.00 16.42 14.08 0.00 90.00
## SSP_total 18 44 135.86 31.26 138.50 136.92 30.39 53.00 189.00
## vabsdscoresc_dss 19 74 73.65 17.51 75.00 74.23 11.86 21.00 122.00
## vabsdscoresd_dss 20 73 73.70 16.61 73.00 73.47 11.86 25.00 131.00
## vabsdscoress_dss 21 74 70.78 15.69 73.00 71.93 13.34 20.00 104.00
## vabsabcabc_standard 22 73 71.27 13.20 72.00 71.47 10.38 20.00 103.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 23.20 0.38 -0.76 0.65
## meanFD 3.92 6.92 52.84 0.05
## viq_all 79.91 -0.08 -0.73 2.20
## piq_all 92.00 -0.10 -0.56 2.48
## fsiq4_all 83.00 -0.22 -0.74 2.15
## A_pct_severity 0.63 0.07 -0.45 0.02
## B_pct_severity 0.68 0.42 -0.48 0.02
## ADI_social_total 25.00 -0.31 -1.01 0.78
## ADI_communication_total 26.00 -0.11 -0.73 0.67
## ADI_RRB_total 12.00 0.11 -0.51 0.29
## ados_2_SA_CSS 9.00 -0.32 -1.12 0.30
## ados_2_RRB_CSS 9.00 -0.24 -1.21 0.32
## SRS_tscore 50.00 -0.15 -0.95 1.51
## SRS_tscore_self 51.00 0.48 -0.39 2.17
## RBS_total 90.00 1.75 4.28 2.05
## SSP_total 136.00 -0.37 -0.39 4.71
## vabsdscoresc_dss 101.00 -0.41 1.24 2.04
## vabsdscoresd_dss 106.00 0.18 1.76 1.94
## vabsdscoress_dss 84.00 -0.74 0.69 1.82
## vabsabcabc_standard 83.00 -0.53 2.46 1.54
## ------------------------------------------------------------
## subgrp: SC_over_RRB
## dataset: Discovery
## vars n mean sd median trimmed mad min max
## dataset* 1 50 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 50 NaN NA NA NaN NA Inf -Inf
## age 3 50 16.26 5.03 15.88 15.96 4.52 7.78 29.40
## meanFD 4 50 0.23 0.24 0.14 0.17 0.10 0.04 1.08
## viq_all 5 49 96.93 17.95 98.00 96.50 17.25 65.55 142.00
## piq_all 6 49 99.20 20.87 102.00 100.51 19.27 52.43 136.38
## fsiq4_all 7 50 98.27 18.28 100.68 98.97 19.75 59.00 128.30
## A_pct_severity 8 50 0.46 0.13 0.46 0.46 0.11 0.19 0.82
## B_pct_severity 9 50 0.16 0.09 0.15 0.16 0.12 0.00 0.33
## ADI_social_total 10 50 18.52 6.44 19.50 19.07 6.67 3.00 28.00
## ADI_communication_total 11 50 14.54 5.03 15.00 14.75 5.19 2.00 24.00
## ADI_RRB_total 12 50 2.96 2.02 3.00 2.85 1.48 0.00 8.00
## ados_2_SA_CSS 13 48 6.31 2.54 7.00 6.42 2.97 1.00 10.00
## ados_2_RRB_CSS 14 48 4.71 2.79 5.50 4.60 2.22 1.00 10.00
## SRS_tscore 15 44 73.23 10.97 74.50 73.89 8.15 44.00 90.00
## SRS_tscore_self 16 21 61.71 9.86 61.00 60.94 8.90 42.00 89.00
## RBS_total 17 43 14.72 13.27 12.00 13.11 13.34 0.00 54.00
## SSP_total 18 33 138.64 28.33 139.00 139.15 37.06 78.00 183.00
## vabsdscoresc_dss 19 45 69.29 16.26 71.00 71.16 10.38 21.00 99.00
## vabsdscoresd_dss 20 45 68.31 16.18 66.00 68.68 11.86 17.00 112.00
## vabsdscoress_dss 21 45 66.24 15.96 68.00 68.08 14.83 20.00 95.00
## vabsabcabc_standard 22 45 65.71 15.45 68.00 67.62 10.38 6.00 91.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 21.62 0.53 -0.05 0.71
## meanFD 1.04 2.20 4.21 0.03
## viq_all 76.45 0.21 -0.40 2.56
## piq_all 83.96 -0.57 -0.42 2.98
## fsiq4_all 69.30 -0.30 -0.92 2.59
## A_pct_severity 0.63 0.14 0.06 0.02
## B_pct_severity 0.33 0.10 -1.12 0.01
## ADI_social_total 25.00 -0.63 -0.45 0.91
## ADI_communication_total 22.00 -0.37 -0.39 0.71
## ADI_RRB_total 8.00 0.56 -0.31 0.29
## ados_2_SA_CSS 9.00 -0.46 -0.85 0.37
## ados_2_RRB_CSS 9.00 -0.15 -1.12 0.40
## SRS_tscore 46.00 -0.59 0.15 1.65
## SRS_tscore_self 47.00 0.70 0.93 2.15
## RBS_total 54.00 1.14 1.05 2.02
## SSP_total 105.00 -0.06 -1.14 4.93
## vabsdscoresc_dss 78.00 -1.26 2.12 2.42
## vabsdscoresd_dss 95.00 -0.37 1.76 2.41
## vabsdscoress_dss 75.00 -1.11 1.34 2.38
## vabsabcabc_standard 85.00 -1.83 4.55 2.30
## ------------------------------------------------------------
## subgrp: TD
## dataset: Discovery
## vars n mean sd median trimmed mad min
## dataset* 1 121 NaN NA NA NaN NA Inf
## subgrp* 2 121 NaN NA NA NaN NA Inf
## age 3 121 16.83 5.23 16.65 16.73 5.69 7.22
## meanFD 4 121 0.18 0.15 0.13 0.15 0.07 0.03
## viq_all 5 119 104.52 19.70 105.00 105.03 19.27 46.00
## piq_all 6 119 106.10 19.47 107.00 107.53 17.79 49.00
## fsiq4_all 7 119 105.72 18.33 108.18 106.99 16.58 53.00
## A_pct_severity 8 0 NaN NA NA NaN NA Inf
## B_pct_severity 9 0 NaN NA NA NaN NA Inf
## ADI_social_total 10 0 NaN NA NA NaN NA Inf
## ADI_communication_total 11 0 NaN NA NA NaN NA Inf
## ADI_RRB_total 12 0 NaN NA NA NaN NA Inf
## ados_2_SA_CSS 13 0 NaN NA NA NaN NA Inf
## ados_2_RRB_CSS 14 0 NaN NA NA NaN NA Inf
## SRS_tscore 15 68 47.84 9.40 45.00 46.32 5.19 37.00
## SRS_tscore_self 16 71 46.69 4.85 46.00 46.26 4.45 39.00
## RBS_total 17 68 2.15 4.74 0.00 0.95 0.00 0.00
## SSP_total 18 59 177.86 12.71 182.00 179.78 8.90 122.00
## vabsdscoresc_dss 19 34 91.97 25.44 99.50 93.50 21.50 21.00
## vabsdscoresd_dss 20 34 90.74 20.28 98.50 92.25 17.05 33.00
## vabsdscoress_dss 21 34 96.21 23.67 102.50 98.57 21.50 33.00
## vabsabcabc_standard 22 34 92.06 23.04 99.50 93.89 15.57 25.00
## max range skew kurtosis se
## dataset* -Inf -Inf NA NA NA
## subgrp* -Inf -Inf NA NA NA
## age 29.84 22.62 0.17 -0.51 0.48
## meanFD 0.85 0.82 2.28 5.65 0.01
## viq_all 160.00 114.00 -0.24 0.38 1.81
## piq_all 147.00 98.00 -0.69 0.32 1.79
## fsiq4_all 142.00 89.00 -0.69 0.58 1.68
## A_pct_severity -Inf -Inf NA NA NA
## B_pct_severity -Inf -Inf NA NA NA
## ADI_social_total -Inf -Inf NA NA NA
## ADI_communication_total -Inf -Inf NA NA NA
## ADI_RRB_total -Inf -Inf NA NA NA
## ados_2_SA_CSS -Inf -Inf NA NA NA
## ados_2_RRB_CSS -Inf -Inf NA NA NA
## SRS_tscore 76.00 39.00 1.54 1.44 1.14
## SRS_tscore_self 63.00 24.00 0.93 1.10 0.58
## RBS_total 27.00 27.00 3.13 10.87 0.57
## SSP_total 190.00 68.00 -1.90 4.85 1.66
## vabsdscoresc_dss 138.00 117.00 -0.71 0.36 4.36
## vabsdscoresd_dss 121.00 88.00 -0.80 0.07 3.48
## vabsdscoress_dss 129.00 96.00 -0.83 -0.31 4.06
## vabsabcabc_standard 127.00 102.00 -0.90 0.30 3.95
## ------------------------------------------------------------
## subgrp: RRB_over_SC
## dataset: Replication
## vars n mean sd median trimmed mad min max range
## dataset* 1 1 NaN NA NA NaN NA Inf -Inf -Inf
## subgrp* 2 1 NaN NA NA NaN NA Inf -Inf -Inf
## age 3 1 11.45 NA 11.45 11.45 0 11.45 11.45 0
## meanFD 4 1 0.38 NA 0.38 0.38 0 0.38 0.38 0
## viq_all 5 1 143.00 NA 143.00 143.00 0 143.00 143.00 0
## piq_all 6 1 148.00 NA 148.00 148.00 0 148.00 148.00 0
## fsiq4_all 7 1 148.00 NA 148.00 148.00 0 148.00 148.00 0
## A_pct_severity 8 1 0.15 NA 0.15 0.15 0 0.15 0.15 0
## B_pct_severity 9 1 0.40 NA 0.40 0.40 0 0.40 0.40 0
## ADI_social_total 10 1 16.00 NA 16.00 16.00 0 16.00 16.00 0
## ADI_communication_total 11 1 6.00 NA 6.00 6.00 0 6.00 6.00 0
## ADI_RRB_total 12 1 7.00 NA 7.00 7.00 0 7.00 7.00 0
## ados_2_SA_CSS 13 1 5.00 NA 5.00 5.00 0 5.00 5.00 0
## ados_2_RRB_CSS 14 1 1.00 NA 1.00 1.00 0 1.00 1.00 0
## SRS_tscore 15 1 60.00 NA 60.00 60.00 0 60.00 60.00 0
## SRS_tscore_self 16 0 NaN NA NA NaN NA Inf -Inf -Inf
## RBS_total 17 1 13.00 NA 13.00 13.00 0 13.00 13.00 0
## SSP_total 18 1 153.00 NA 153.00 153.00 0 153.00 153.00 0
## vabsdscoresc_dss 19 1 99.00 NA 99.00 99.00 0 99.00 99.00 0
## vabsdscoresd_dss 20 1 74.00 NA 74.00 74.00 0 74.00 74.00 0
## vabsdscoress_dss 21 1 76.00 NA 76.00 76.00 0 76.00 76.00 0
## vabsabcabc_standard 22 1 81.00 NA 81.00 81.00 0 81.00 81.00 0
## skew kurtosis se
## dataset* NA NA NA
## subgrp* NA NA NA
## age NA NA NA
## meanFD NA NA NA
## viq_all NA NA NA
## piq_all NA NA NA
## fsiq4_all NA NA NA
## A_pct_severity NA NA NA
## B_pct_severity NA NA NA
## ADI_social_total NA NA NA
## ADI_communication_total NA NA NA
## ADI_RRB_total NA NA NA
## ados_2_SA_CSS NA NA NA
## ados_2_RRB_CSS NA NA NA
## SRS_tscore NA NA NA
## SRS_tscore_self NA NA NA
## RBS_total NA NA NA
## SSP_total NA NA NA
## vabsdscoresc_dss NA NA NA
## vabsdscoresd_dss NA NA NA
## vabsdscoress_dss NA NA NA
## vabsabcabc_standard NA NA NA
## ------------------------------------------------------------
## subgrp: SC_equal_RRB
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 83 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 83 NaN NA NA NaN NA Inf -Inf
## age 3 83 16.51 5.67 16.33 16.24 6.06 7.12 30.15
## meanFD 4 83 0.25 0.27 0.18 0.19 0.10 0.05 1.60
## viq_all 5 82 102.11 16.52 102.73 102.78 18.21 62.90 133.00
## piq_all 6 82 103.83 18.60 106.00 105.21 19.27 52.00 134.00
## fsiq4_all 7 82 103.28 16.55 106.38 104.15 17.05 64.00 131.00
## A_pct_severity 8 83 0.28 0.13 0.26 0.27 0.12 0.04 0.65
## B_pct_severity 9 83 0.23 0.13 0.23 0.23 0.13 0.00 0.67
## ADI_social_total 10 83 14.96 5.92 15.00 15.21 5.93 1.00 27.00
## ADI_communication_total 11 83 11.84 5.36 11.00 11.82 5.93 0.00 24.00
## ADI_RRB_total 12 83 3.94 2.29 4.00 3.87 2.97 0.00 9.00
## ados_2_SA_CSS 13 81 5.68 2.51 6.00 5.74 2.97 1.00 10.00
## ados_2_RRB_CSS 14 81 4.91 2.52 5.00 4.91 1.48 1.00 10.00
## SRS_tscore 15 76 67.13 11.61 68.00 67.06 13.34 43.00 90.00
## SRS_tscore_self 16 40 61.90 7.86 61.50 61.66 6.67 46.00 84.00
## RBS_total 17 73 13.37 11.30 10.00 11.92 10.38 0.00 52.00
## SSP_total 18 50 142.44 26.14 142.50 144.18 33.36 69.00 184.00
## vabsdscoresc_dss 19 76 82.01 13.81 80.00 81.24 12.60 50.00 122.00
## vabsdscoresd_dss 20 75 78.24 15.27 77.00 77.59 13.34 38.00 119.00
## vabsdscoress_dss 21 76 75.91 15.22 77.00 76.52 11.86 28.00 112.00
## vabsabcabc_standard 22 75 77.09 12.83 77.00 76.74 8.90 39.00 117.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 23.03 0.35 -0.56 0.62
## meanFD 1.55 3.50 13.51 0.03
## viq_all 70.10 -0.30 -0.77 1.82
## piq_all 82.00 -0.63 -0.05 2.05
## fsiq4_all 67.00 -0.44 -0.55 1.83
## A_pct_severity 0.61 0.45 0.09 0.01
## B_pct_severity 0.67 0.60 0.45 0.01
## ADI_social_total 26.00 -0.31 -0.61 0.65
## ADI_communication_total 24.00 0.06 -0.62 0.59
## ADI_RRB_total 9.00 0.27 -0.49 0.25
## ados_2_SA_CSS 9.00 -0.22 -0.82 0.28
## ados_2_RRB_CSS 9.00 -0.40 -0.78 0.28
## SRS_tscore 47.00 0.03 -0.89 1.33
## SRS_tscore_self 38.00 0.42 0.53 1.24
## RBS_total 52.00 1.26 1.53 1.32
## SSP_total 115.00 -0.56 -0.40 3.70
## vabsdscoresc_dss 72.00 0.56 0.23 1.58
## vabsdscoresd_dss 81.00 0.31 0.29 1.76
## vabsdscoress_dss 84.00 -0.55 1.08 1.75
## vabsabcabc_standard 78.00 0.24 1.12 1.48
## ------------------------------------------------------------
## subgrp: SC_over_RRB
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 49 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 49 NaN NA NA NaN NA Inf -Inf
## age 3 49 16.30 5.21 15.78 16.09 5.99 8.29 29.23
## meanFD 4 49 0.23 0.21 0.16 0.20 0.11 0.04 1.31
## viq_all 5 46 93.12 19.32 96.64 94.25 17.93 50.91 127.00
## piq_all 6 48 96.22 21.60 99.50 97.14 20.71 44.03 138.00
## fsiq4_all 7 47 95.43 20.11 101.36 95.83 19.81 59.00 139.00
## A_pct_severity 8 49 0.51 0.13 0.52 0.51 0.14 0.27 0.75
## B_pct_severity 9 49 0.20 0.12 0.21 0.19 0.13 0.00 0.47
## ADI_social_total 10 49 19.39 5.87 20.00 19.71 5.93 6.00 29.00
## ADI_communication_total 11 49 15.61 4.64 16.00 15.83 4.45 4.00 24.00
## ADI_RRB_total 12 49 3.88 2.54 3.00 3.76 2.97 0.00 10.00
## ados_2_SA_CSS 13 46 6.20 2.98 6.00 6.29 4.45 1.00 10.00
## ados_2_RRB_CSS 14 46 4.50 2.83 5.00 4.39 2.97 1.00 9.00
## SRS_tscore 15 41 76.61 10.61 80.00 77.48 13.34 51.00 90.00
## SRS_tscore_self 16 23 62.65 10.72 62.00 62.63 10.38 40.00 84.00
## RBS_total 17 41 21.59 15.81 18.00 19.94 11.86 1.00 73.00
## SSP_total 18 32 134.62 24.70 138.50 134.31 25.20 91.00 181.00
## vabsdscoresc_dss 19 44 69.50 14.57 69.00 70.47 11.86 21.00 100.00
## vabsdscoresd_dss 20 44 68.27 15.83 66.50 67.69 12.60 42.00 118.00
## vabsdscoress_dss 21 44 63.43 15.88 63.50 63.53 14.83 23.00 100.00
## vabsabcabc_standard 22 44 65.25 13.82 65.00 65.61 8.90 28.00 94.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 20.94 0.43 -0.77 0.74
## meanFD 1.26 2.93 11.01 0.03
## viq_all 76.09 -0.54 -0.59 2.85
## piq_all 93.97 -0.42 -0.63 3.12
## fsiq4_all 80.00 -0.25 -0.85 2.93
## A_pct_severity 0.48 -0.14 -0.93 0.02
## B_pct_severity 0.47 0.30 -0.77 0.02
## ADI_social_total 23.00 -0.56 -0.53 0.84
## ADI_communication_total 20.00 -0.47 -0.51 0.66
## ADI_RRB_total 10.00 0.46 -0.72 0.36
## ados_2_SA_CSS 9.00 -0.15 -1.39 0.44
## ados_2_RRB_CSS 8.00 -0.09 -1.36 0.42
## SRS_tscore 39.00 -0.56 -0.71 1.66
## SRS_tscore_self 44.00 0.02 -0.48 2.23
## RBS_total 72.00 1.06 0.87 2.47
## SSP_total 90.00 -0.09 -0.89 4.37
## vabsdscoresc_dss 79.00 -1.01 2.18 2.20
## vabsdscoresd_dss 76.00 0.65 0.49 2.39
## vabsdscoress_dss 77.00 -0.11 -0.03 2.39
## vabsabcabc_standard 66.00 -0.36 0.79 2.08
## ------------------------------------------------------------
## subgrp: TD
## dataset: Replication
## vars n mean sd median trimmed mad min max
## dataset* 1 122 NaN NA NA NaN NA Inf -Inf
## subgrp* 2 122 NaN NA NA NaN NA Inf -Inf
## age 3 122 16.86 6.07 16.34 16.58 7.59 6.89 29.72
## meanFD 4 122 0.23 0.46 0.14 0.15 0.07 0.04 4.60
## viq_all 5 122 104.02 17.58 108.18 105.64 12.13 45.00 140.00
## piq_all 6 122 104.64 18.41 108.96 106.56 14.08 49.00 139.00
## fsiq4_all 7 122 104.94 17.14 108.09 107.29 11.86 50.00 134.00
## A_pct_severity 8 0 NaN NA NA NaN NA Inf -Inf
## B_pct_severity 9 0 NaN NA NA NaN NA Inf -Inf
## ADI_social_total 10 0 NaN NA NA NaN NA Inf -Inf
## ADI_communication_total 11 0 NaN NA NA NaN NA Inf -Inf
## ADI_RRB_total 12 0 NaN NA NA NaN NA Inf -Inf
## ados_2_SA_CSS 13 0 NaN NA NA NaN NA Inf -Inf
## ados_2_RRB_CSS 14 0 NaN NA NA NaN NA Inf -Inf
## SRS_tscore 15 65 47.23 9.34 44.00 45.66 4.45 37.00 90.00
## SRS_tscore_self 16 61 48.44 6.84 47.00 47.63 5.93 39.00 69.00
## RBS_total 17 63 3.08 11.54 0.00 0.86 0.00 0.00 89.00
## SSP_total 18 54 174.93 19.38 182.00 178.41 6.67 75.00 190.00
## vabsdscoresc_dss 19 39 92.74 25.45 96.00 95.82 20.76 21.00 125.00
## vabsdscoresd_dss 20 39 91.10 22.65 97.00 93.91 14.83 27.00 122.00
## vabsdscoress_dss 21 39 98.90 27.04 103.00 102.12 17.79 20.00 132.00
## vabsabcabc_standard 22 38 93.00 25.39 100.00 96.44 15.57 20.00 126.00
## range skew kurtosis se
## dataset* -Inf NA NA NA
## subgrp* -Inf NA NA NA
## age 22.83 0.33 -0.97 0.55
## meanFD 4.56 7.54 64.83 0.04
## viq_all 95.00 -0.99 1.51 1.59
## piq_all 90.00 -0.93 0.67 1.67
## fsiq4_all 84.00 -1.28 1.67 1.55
## A_pct_severity -Inf NA NA NA
## B_pct_severity -Inf NA NA NA
## ADI_social_total -Inf NA NA NA
## ADI_communication_total -Inf NA NA NA
## ADI_RRB_total -Inf NA NA NA
## ados_2_SA_CSS -Inf NA NA NA
## ados_2_RRB_CSS -Inf NA NA NA
## SRS_tscore 53.00 2.14 5.82 1.16
## SRS_tscore_self 30.00 1.05 0.48 0.88
## RBS_total 89.00 6.60 45.89 1.45
## SSP_total 115.00 -2.88 10.92 2.64
## vabsdscoresc_dss 104.00 -1.21 1.00 4.08
## vabsdscoresd_dss 95.00 -1.34 1.26 3.63
## vabsdscoress_dss 112.00 -1.26 1.10 4.33
## vabsabcabc_standard 106.00 -1.42 1.40 4.12
#------------------------------------------------------------------------------
# Table of subtypes X sex
# Discovery
data2use = subset(data2write,data2write$dataset=="Discovery")
table(data2use$subgrp,data2use$sex)
##
## Female Male
## RRB_over_SC 3 3
## SC_equal_RRB 18 59
## SC_over_RRB 14 36
## TD 41 80
cs_res = chisq.test(data2use$subgrp,data2use$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: data2use$subgrp and data2use$sex
## X-squared = 3.7208, df = 3, p-value = 0.2932
# Replication
data2use = subset(data2write,data2write$dataset=="Replication")
table(data2use$subgrp,data2use$sex)
##
## Female Male
## RRB_over_SC 0 1
## SC_equal_RRB 23 60
## SC_over_RRB 13 36
## TD 47 75
cs_res = chisq.test(data2use$subgrp,data2use$sex)
cs_res
##
## Pearson's Chi-squared test
##
## data: data2use$subgrp and data2use$sex
## X-squared = 4.1601, df = 3, p-value = 0.2447
#------------------------------------------------------------------------------
vars2analyze = c("age","meanFD","viq_all","piq_all","fsiq4_all",
"A_pct_severity","B_pct_severity",
"ADI_social_total","ADI_communication_total","ADI_RRB_total",
"ados_2_SA_CSS","ados_2_RRB_CSS",
"SRS_tscore_self","RBS_total","SSP_total",
"vabsdscoress_dss","vabsdscoresd_dss","vabsdscoresc_dss","vabsabcabc_standard")
vnames = c("Age","Mean FD","VIQ","PIQ","FIQ","ADI-R SC","ADI-R RRB","ADI-R Social","ADI-R Communication","ADI-R RRB","ADOS SA CSS","ADOS RRB CSS","SRS","RBS","SSP","Vineland Socialization","Vineland Daily Living Skills","Vineland Communication","Vineland ABC")
cnames = c("All_Disc.fstat","All_Disc.pval","All_Rep.fstat","All_Rep.pval",
"SCequalRRB_vs_SCoverRRB_Disc.fstat","SCequalRRB_vs_SCoverRRB_Disc.tstat",
"SCequalRRB_vs_SCoverRRB_Disc.pval","SCequalRRB_vs_SCoverRRB_Disc.es",
"SCequalRRB_vs_SCoverRRB_Rep.fstat","SCequalRRB_vs_SCoverRRB_Rep.tstat",
"SCequalRRB_vs_SCoverRRB_Rep.pval","SCequalRRB_vs_SCoverRRB_Rep.es",
"SCequalRRB_vs_SCoverRRB.repBF")
output_res = data.frame(matrix(nrow = length(vars2analyze),ncol = length(cnames)))
colnames(output_res) = cnames
rownames(output_res) = vars2analyze
output_res$varNames = vars2analyze
for (ivar in 1:length(vars2analyze)){
y_var = vars2analyze[ivar]
# print(y_var)
#----------------------------------------------------------------------------
# Discovery
df4mod = subset(df2use, df2use$dataset=="Discovery")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"All_Disc.fstat"] = res["subgrp","F-value"]
output_res[y_var,"All_Disc.pval"] = res["subgrp","p-value"]
#----------------------------------------------------------------------------
# Replication
df4mod = subset(df2use, df2use$dataset=="Replication")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"All_Rep.fstat"] = res["subgrp","F-value"]
output_res[y_var,"All_Rep.pval"] = res["subgrp","p-value"]
#----------------------------------------------------------------------------
# Discovery
df4mod = subset(df2use, df2use$dataset=="Discovery" & !is.element(df2use$subgrp,c("TD","RRB_over_SC")))
n1 = sum(df4mod$subgrp=="SC_equal_RRB")
m1 = sum(df4mod$subgrp=="SC_over_RRB")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.fstat"] = res["subgrp","F-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.pval"] = res["subgrp","p-value"]
res = summary(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"] = res$tTable[2,"t-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.es"] = cohens_d(df4mod[df4mod$subgrp=="SC_equal_RRB",y_var],
df4mod[df4mod$subgrp=="SC_over_RRB",y_var])
#----------------------------------------------------------------------------
# Replication
df4mod = subset(df2use, df2use$dataset=="Replication" & !is.element(df2use$subgrp,c("TD","RRB_over_SC")))
n2 = sum(df4mod$subgrp=="SC_equal_RRB")
m2 = sum(df4mod$subgrp=="SC_over_RRB")
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"subgrp"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.fstat"] = res["subgrp","F-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.pval"] = res["subgrp","p-value"]
res = summary(mod2use)
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.tstat"] = res$tTable[2,"t-value"]
output_res[y_var,"SCequalRRB_vs_SCoverRRB_Rep.es"] = cohens_d(df4mod[df4mod$subgrp=="SC_equal_RRB",y_var],
df4mod[df4mod$subgrp=="SC_over_RRB",y_var])
#----------------------------------------------------------------------------
# Replication Bayes Factor
res_bf = BFSALL(tobs = output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"],
trep = output_res[y_var,"SCequalRRB_vs_SCoverRRB_Disc.tstat"],
n1 = n1,
n2 = n2,
m1 = m1,
m2 = m2,
sample = 2,
Type = 'ALL')
output_res[y_var,"SCequalRRB_vs_SCoverRRB.repBF"] = res_bf["Replication BF","Replication 1"]
# make a plot
colors2use = get_ggColorHue(3)
df4plot = subset(df2use, df2use$subgrp!="RRB_over_SC")
p = ggplot(data = df4plot, aes_string(x = "subgrp", y = y_var, colour = "subgrp")) + facet_grid(. ~ dataset)
p = p + geom_jitter() + geom_boxplot(fill = NA, colour = "#000000", outlier.shape = NA)
p = p + ylab(vnames[ivar]) + xlab("Group") +
scale_x_discrete(labels=c("SC_equal_RRB"="SC=RRB","SC_over_RRB"="SC>RRB","TD"="TD")) +
scale_colour_manual(values = c(colors2use[2:3],"grey60")) + guides(colour=FALSE) +
theme(text = element_text(size=fontSize-5),
axis.text.x = element_text(size=fontSize-5),
axis.text.y = element_text(size=fontSize-5))
print(p)
}
vabc["1","Discovery"] = output_res["vabsabcabc_standard","SCequalRRB_vs_SCoverRRB_Disc.es"]
vabc["1","Replication"] = output_res["vabsabcabc_standard","SCequalRRB_vs_SCoverRRB_Rep.es"]
vabc_dls["1","Discovery"] = output_res["vabsdscoresd_dss","SCequalRRB_vs_SCoverRRB_Disc.es"]
vabc_dls["1","Replication"] = output_res["vabsdscoresd_dss","SCequalRRB_vs_SCoverRRB_Rep.es"]
output_res
## All_Disc.fstat All_Disc.pval All_Rep.fstat All_Rep.pval
## age 0.3409502 7.957351e-01 0.6350975 5.930333e-01
## meanFD 2.1793571 9.102259e-02 0.1076975 9.555433e-01
## viq_all 2.2881571 7.915093e-02 5.1067969 1.915639e-03
## piq_all 1.6155392 1.863606e-01 2.6993027 4.636135e-02
## fsiq4_all 2.3403632 7.396269e-02 3.9938050 8.405111e-03
## A_pct_severity 29.1910165 3.708867e-11 54.6027167 0.000000e+00
## B_pct_severity 24.8789326 7.638108e-10 1.9898230 1.409594e-01
## ADI_social_total 2.4614717 8.937070e-02 9.7525012 1.147904e-04
## ADI_communication_total 0.6602227 5.185006e-01 10.7400003 4.904794e-05
## ADI_RRB_total 24.9062961 7.489428e-10 0.6930022 5.019534e-01
## ados_2_SA_CSS 2.4392023 9.140608e-02 0.8423143 4.332017e-01
## ados_2_RRB_CSS 0.1235088 8.839224e-01 1.6540440 1.955349e-01
## SRS_tscore_self 36.4235907 0.000000e+00 48.1766540 5.551115e-16
## RBS_total 19.8557033 4.405487e-11 16.9467104 1.121701e-09
## SSP_total 30.5392542 5.218048e-15 24.2225110 1.689981e-12
## vabsdscoress_dss 23.3455960 1.858957e-12 24.2020517 7.137624e-13
## vabsdscoresd_dss 12.6970004 1.951534e-07 10.6333916 2.184428e-06
## vabsdscoresc_dss 10.3252985 3.216407e-06 11.4872327 7.792022e-07
## vabsabcabc_standard 19.0024720 1.726517e-10 17.3725254 9.638763e-10
## SCequalRRB_vs_SCoverRRB_Disc.fstat
## age 0.002926778
## meanFD 0.561915762
## viq_all 0.144543558
## piq_all 0.005605256
## fsiq4_all 0.038733646
## A_pct_severity 41.199213972
## B_pct_severity 33.793235961
## ADI_social_total 2.723119508
## ADI_communication_total 0.941846172
## ADI_RRB_total 26.725139105
## ados_2_SA_CSS 0.060358486
## ados_2_RRB_CSS 0.206277233
## SRS_tscore_self 0.014011628
## RBS_total 1.930634341
## SSP_total 0.679098271
## vabsdscoress_dss 2.829696399
## vabsdscoresd_dss 3.318283400
## vabsdscoresc_dss 1.832087369
## vabsabcabc_standard 4.440678576
## SCequalRRB_vs_SCoverRRB_Disc.tstat
## age -0.05409971
## meanFD -0.74961041
## viq_all -0.38018885
## piq_all -0.07486826
## fsiq4_all -0.19680865
## A_pct_severity 6.41866138
## B_pct_severity -5.81319499
## ADI_social_total 1.65018772
## ADI_communication_total 0.97048759
## ADI_RRB_total -5.16963626
## ados_2_SA_CSS 0.24567964
## ados_2_RRB_CSS -0.45417754
## SRS_tscore_self 0.11837072
## RBS_total -1.38947268
## SSP_total 0.82407419
## vabsdscoress_dss -1.68217015
## vabsdscoresd_dss -1.82161560
## vabsdscoresc_dss -1.35354622
## vabsabcabc_standard -2.10729176
## SCequalRRB_vs_SCoverRRB_Disc.pval
## age 9.569442e-01
## meanFD 4.549324e-01
## viq_all 7.044773e-01
## piq_all 9.404442e-01
## fsiq4_all 8.443048e-01
## A_pct_severity 2.748584e-09
## B_pct_severity 5.016122e-08
## ADI_social_total 1.014768e-01
## ADI_communication_total 3.337236e-01
## ADI_RRB_total 9.297268e-07
## ados_2_SA_CSS 8.063535e-01
## ados_2_RRB_CSS 6.505284e-01
## SRS_tscore_self 9.062389e-01
## RBS_total 1.677147e-01
## SSP_total 4.126190e-01
## vabsdscoress_dss 9.527311e-02
## vabsdscoresd_dss 7.115856e-02
## vabsdscoresc_dss 1.785588e-01
## vabsabcabc_standard 3.730405e-02
## SCequalRRB_vs_SCoverRRB_Disc.es
## age 0.005202566
## meanFD 0.136146695
## viq_all 0.057261993
## piq_all 0.017377531
## fsiq4_all 0.035745032
## A_pct_severity -1.068087853
## B_pct_severity 0.958629595
## ADI_social_total -0.253239685
## ADI_communication_total -0.097220221
## ADI_RRB_total 0.900500211
## ados_2_SA_CSS -0.034527370
## ados_2_RRB_CSS 0.052238597
## SRS_tscore_self 0.118932130
## RBS_total 0.269321123
## SSP_total -0.091977701
## vabsdscoress_dss 0.287326559
## vabsdscoresd_dss 0.327620942
## vabsdscoresc_dss 0.255933511
## vabsabcabc_standard 0.393924828
## SCequalRRB_vs_SCoverRRB_Rep.fstat
## age 0.04586012
## meanFD 0.08589090
## viq_all 6.92792527
## piq_all 3.25026109
## fsiq4_all 4.54321880
## A_pct_severity 107.43254137
## B_pct_severity 2.41260784
## ADI_social_total 19.47713747
## ADI_communication_total 19.29022585
## ADI_RRB_total 0.00133106
## ados_2_SA_CSS 1.48802963
## ados_2_RRB_CSS 1.61507709
## SRS_tscore_self 0.10212748
## RBS_total 12.37692724
## SSP_total 2.33246365
## vabsdscoress_dss 18.79391008
## vabsdscoresd_dss 12.26257294
## vabsdscoresc_dss 21.97124650
## vabsabcabc_standard 24.02962953
## SCequalRRB_vs_SCoverRRB_Rep.tstat
## age -0.21414977
## meanFD -0.29307149
## viq_all -2.63209522
## piq_all -1.80284805
## fsiq4_all -2.13148277
## A_pct_severity 10.36496702
## B_pct_severity -1.55325717
## ADI_social_total 4.41329100
## ADI_communication_total 4.39206396
## ADI_RRB_total -0.03648369
## ados_2_SA_CSS 1.21984820
## ados_2_RRB_CSS -1.27085683
## SRS_tscore_self 0.31957391
## RBS_total 3.51808574
## SSP_total -1.52724054
## vabsdscoress_dss -4.33519435
## vabsdscoresd_dss -3.50179567
## vabsdscoresc_dss -4.68734962
## vabsabcabc_standard -4.90200260
## SCequalRRB_vs_SCoverRRB_Rep.pval
## age 8.307737e-01
## meanFD 7.699458e-01
## viq_all 9.573231e-03
## piq_all 7.382127e-02
## fsiq4_all 3.502273e-02
## A_pct_severity 0.000000e+00
## B_pct_severity 1.228502e-01
## ADI_social_total 2.151451e-05
## ADI_communication_total 2.342006e-05
## ADI_RRB_total 9.709540e-01
## ados_2_SA_CSS 2.248753e-01
## ados_2_RRB_CSS 2.061975e-01
## SRS_tscore_self 7.504396e-01
## RBS_total 6.351267e-04
## SSP_total 1.307987e-01
## vabsdscoress_dss 3.137227e-05
## vabsdscoresd_dss 6.608324e-04
## vabsdscoresc_dss 7.661567e-06
## vabsabcabc_standard 3.169900e-06
## SCequalRRB_vs_SCoverRRB_Rep.es
## age 0.03858047
## meanFD 0.05279873
## viq_all 0.51039535
## piq_all 0.38510505
## fsiq4_all 0.43724080
## A_pct_severity -1.79780755
## B_pct_severity 0.29551079
## ADI_social_total -0.74987778
## ADI_communication_total -0.73819834
## ADI_RRB_total 0.02607430
## ados_2_SA_CSS -0.19191587
## ados_2_RRB_CSS 0.15678698
## SRS_tscore_self -0.08339268
## RBS_total -0.62512296
## SSP_total 0.30503384
## vabsdscoress_dss 0.80642663
## vabsdscoresd_dss 0.64402872
## vabsdscoresc_dss 0.88770839
## vabsabcabc_standard 0.89696314
## SCequalRRB_vs_SCoverRRB.repBF varNames
## age 7.056294e-01 age
## meanFD 9.326246e-01 meanFD
## viq_all 7.561628e-01 viq_all
## piq_all 7.063480e-01 piq_all
## fsiq4_all 7.180131e-01 fsiq4_all
## A_pct_severity 4.435845e+07 A_pct_severity
## B_pct_severity 2.470910e+06 B_pct_severity
## ADI_social_total 2.727642e+00 ADI_social_total
## ADI_communication_total 1.128619e+00 ADI_communication_total
## ADI_RRB_total 1.390222e+05 ADI_RRB_total
## ados_2_SA_CSS 7.268164e-01 ados_2_SA_CSS
## ados_2_RRB_CSS 7.806136e-01 ados_2_RRB_CSS
## SRS_tscore_self 7.090095e-01 SRS_tscore_self
## RBS_total 1.840873e+00 RBS_total
## SSP_total 9.902609e-01 SSP_total
## vabsdscoress_dss 2.864643e+00 vabsdscoress_dss
## vabsdscoresd_dss 3.644820e+00 vabsdscoresd_dss
## vabsdscoresc_dss 1.753322e+00 vabsdscoresc_dss
## vabsabcabc_standard 6.296295e+00 vabsabcabc_standard
# # plot Vineland ABC effect sizes over thresholds
# tmp = data.frame(vabc)
# tmp$threshold = factor(rownames(tmp))
#
# df2plot = melt(tmp)
# p = ggplot(data = df2plot, aes(x = threshold, y =value, color=variable, group=variable)) + facet_grid(. ~ variable)
# p = p + geom_line(size=4) +
# geom_point(size=7) +
# ylab("Cohen's d") +
# xlab("Z-threshold") +
# ylim(0,1) +
# guides(color=FALSE) +
# theme(text = element_text(size=fontSize),
# axis.text.x = element_text(size=fontSize),
# axis.text.y = element_text(size=fontSize))
# p
# plot Vineland ABC effect sizes over thresholds
tmp = data.frame(vabc)
tmp$threshold = factor(rownames(tmp))
df2plot = melt(tmp)
p = ggplot(data = df2plot, aes(x = threshold, y =value, color=variable, group=variable)) + facet_grid(. ~ variable)
p = p + geom_line(size=4) +
geom_point(size=7) +
ylab("Cohen's d") +
xlab("Z-threshold") +
ylim(0,1) + scale_colour_manual(values = c("dodger blue","#ff8d1e")) +
guides(color=FALSE) +
theme(text = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize))
p
#------------------------------------------------------------------------------
# model effect of z difference score on Vineland ABC
y_var = "vabsabcabc_standard"
# Discovery
df4mod = subset(data2write, data2write$dataset=="Discovery")
df4mod = merge(df4mod,tmp_test[,c("subid","z_ds")],by.x="subid",by.y="subid")
df4mod[df4mod[,y_var]==999,y_var] = NA
# df4mod[df4mod[,y_var]==777,y_var] = NA
n_disc = sum(!is.na(df4mod[,y_var]))
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"z_ds"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
tres = summary(mod2use)
t_disc = tres$tTable["z_ds","t-value"]
res
## numDF denDF F-value p-value
## (Intercept) 1 117 576.3867 <.0001
## z_ds 1 117 4.4529 0.037
cor.test(df4mod[,y_var],df4mod$z_ds)
##
## Pearson's product-moment correlation
##
## data: df4mod[, y_var] and df4mod$z_ds
## t = -1.6818, df = 120, p-value = 0.0952
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.32085672 0.02673098
## sample estimates:
## cor
## -0.151751
# Replication
df4mod = subset(data2write, data2write$dataset=="Replication")
df4mod = merge(df4mod,tmp_test[,c("subid","z_ds")],by.x="subid",by.y="subid")
df4mod[df4mod[,y_var]==999,y_var] = NA
# df4mod[df4mod[,y_var]==777,y_var] = NA
n_rep = sum(!is.na(df4mod[,y_var]))
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"z_ds"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
tres = summary(mod2use)
t_rep = tres$tTable["z_ds","t-value"]
res
## numDF denDF F-value p-value
## (Intercept) 1 115 544.8477 <.0001
## z_ds 1 115 25.1670 <.0001
cor.test(df4mod[,y_var],df4mod$z_ds)
##
## Pearson's product-moment correlation
##
## data: df4mod[, y_var] and df4mod$z_ds
## t = -4.6877, df = 118, p-value = 7.474e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5373047 -0.2335690
## sample estimates:
## cor
## -0.3962224
# replication Bayes Factor
res_bf = BFSALL(tobs =t_disc,
trep = t_rep,
n1 = n_disc,
n2 = n_rep,
sample = 1,
Type = 'ALL')
res_bf["Replication BF","Replication 1"]
## [1] 8931.714
# plot scatterplot of z_ds by Vineland ABC
df4plot = subset(data2write)
df4plot = merge(df4plot,tmp_test[,c("subid","z_ds")],by.x="subid",by.y="subid")
df4plot[df4plot[,y_var]==999,y_var] = NA
# df4plot[df4plot[,y_var]==777,y_var] = NA
p = ggplot(data=df4plot, aes(x = z_ds, y = vabsabcabc_standard, colour = dataset)) + facet_grid(. ~ dataset)
p = p + geom_point(size=3) + geom_smooth(method=lm) + xlab("Z SC-RRB") + ylab("Vineland ABC") +
scale_colour_manual(values = c("dodger blue","#ff8d1e")) + guides(colour=FALSE) +
theme(text = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize))
p
# plot Vineland Daily Living Skills effect sizes over thresholds
tmp = data.frame(vabc_dls)
tmp$threshold = factor(rownames(tmp))
df2plot = melt(tmp)
p = ggplot(data = df2plot, aes(x = threshold, y =value, color=variable, group=variable)) + facet_grid(. ~ variable)
p = p + geom_line(size=4) +
geom_point(size=7) +
ylab("Cohen's d") +
xlab("Z-threshold") +
ylim(-0.1,1) + scale_colour_manual(values = c("dodger blue","#ff8d1e")) +
guides(color=FALSE) +
theme(text = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize))
p
#------------------------------------------------------------------------------
# model effect of z difference score on Vineland ABC
y_var = "vabsdscoresd_dss"
# Discovery
df4mod = subset(data2write, data2write$dataset=="Discovery")
df4mod = merge(df4mod,tmp_test[,c("subid","z_ds")],by.x="subid",by.y="subid")
df4mod[df4mod[,y_var]==999,y_var] = NA
# df4mod[df4mod[,y_var]==777,y_var] = NA
n_disc = sum(!is.na(df4mod[,y_var]))
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"z_ds"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
tres = summary(mod2use)
t_disc = tres$tTable["z_ds","t-value"]
res
## numDF denDF F-value p-value
## (Intercept) 1 117 392.4521 <.0001
## z_ds 1 117 1.0061 0.3179
cor.test(df4mod[,y_var],df4mod$z_ds)
##
## Pearson's product-moment correlation
##
## data: df4mod[, y_var] and df4mod$z_ds
## t = -0.5396, df = 120, p-value = 0.5905
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2249921 0.1296964
## sample estimates:
## cor
## -0.04919883
# Replication
df4mod = subset(data2write, data2write$dataset=="Replication")
df4mod = merge(df4mod,tmp_test[,c("subid","z_ds")],by.x="subid",by.y="subid")
df4mod[df4mod[,y_var]==999,y_var] = NA
# df4mod[df4mod[,y_var]==777,y_var] = NA
n_rep = sum(!is.na(df4mod[,y_var]))
# construct linear model
# mixed-effect model: site as random factor, all other covariates as fixed factors
fx_form = as.formula(sprintf("%s ~ %s",y_var,"z_ds"))
rx_form = as.formula(sprintf("~ 1|%s","Centre"))
mod2use = eval(substitute(lme(fixed = fx_form, random = rx_form, data = df4mod, na.action = na.omit)))
# run ANOVA
res = anova(mod2use)
tres = summary(mod2use)
t_rep = tres$tTable["z_ds","t-value"]
res
## numDF denDF F-value p-value
## (Intercept) 1 115 326.2800 <.0001
## z_ds 1 115 13.6755 3e-04
cor.test(df4mod[,y_var],df4mod$z_ds)
##
## Pearson's product-moment correlation
##
## data: df4mod[, y_var] and df4mod$z_ds
## t = -3.4038, df = 118, p-value = 0.0009085
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4539225 -0.1265501
## sample estimates:
## cor
## -0.299009
# replication Bayes Factor
res_bf = BFSALL(tobs =t_disc,
trep = t_rep,
n1 = n_disc,
n2 = n_rep,
sample = 1,
Type = 'ALL')
res_bf["Replication BF","Replication 1"]
## [1] 79.65693
# plot scatterplot of z_ds by Vineland ABC
df4plot = subset(data2write)
df4plot = merge(df4plot,tmp_test[,c("subid","z_ds")],by.x="subid",by.y="subid")
df4plot[df4plot[,y_var]==999,y_var] = NA
# df4plot[df4plot[,y_var]==777,y_var] = NA
p = ggplot(data=df4plot, aes(x = z_ds, y = vabsdscoresd_dss, colour = dataset)) + facet_grid(. ~ dataset)
p = p + geom_point(size=3) + geom_smooth(method=lm) + xlab("Z SC-RRB") + ylab("Vineland Daily Living Skills") +
scale_colour_manual(values = c("dodger blue","#ff8d1e")) + guides(colour=FALSE) +
theme(text = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize))
p